Re: [PATCH][ARM/AArch64 Testsuite] Add float16 lane_indices tests (was: Re: [PATCH 9/15][AArch64] vld{2,3,4}{,_lane,_dup}, vcombine, vcreate)

2015-08-17 Thread James Greenhalgh
On Tue, Aug 04, 2015 at 12:07:21PM +0100, Alan Lawrence wrote:
 James Greenhalgh wrote:
  Hi Alan,
  
  The arm_neon.h portion of this patch does not apply after Charles' recent
  changes. Could you please rebase and resubmit the patch for review?
  
  Thanks,
  James
 
 These are straightforward copies of the corresponding uint16 tests, with 
 appropriate substitutions uint-float and u16-f16. As per the existing 
 tests, 
 these are xfailed on ARM targets, pending further work on PR/63870.

OK.

Thanks,
James

 
 Cross-tested on aarch64-none-elf.
 
 gcc/testsuite/ChangeLog:
 
   * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_indices_1.c: New.
   * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_indices_1.c: New.
   * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_indices_1.c: New.
   * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_indices_1.c: New.
   * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_indices_1.c: New.
   * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_indices_1.c: New.
   * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_indices_1.c: New.
   * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_indices_1.c: New.
   * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_indices_1.c: New.
   * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_indices_1.c: New.
   * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_indices_1.c: New.
   * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_indices_1.c: New.




Re: [PATCH 9/15][AArch64] vld{2,3,4}{,_lane,_dup}, vcombine, vcreate

2015-08-17 Thread James Greenhalgh
On Thu, Aug 06, 2015 at 05:28:34PM +0100, Alan Lawrence wrote:
 Alan Lawrence wrote:
   James Greenhalgh wrote:
   Hi Alan,
  
   The arm_neon.h portion of this patch does not apply after Charles' recent
   changes. Could you please rebase and resubmit the patch for review?
  
   Thanks,
   James
  
   Ah, indeed, thanks. Here's a rebased version, using Charles' new versions 
 of
   __(LD|ST)[234]_LANE_FUNC. I'll follow with a patch adding corresponding
   lane_f16_indices tests in a separate email.
  
   (Changelog as before)
  
   Bootstrapped + check-gcc on aarch64-none-linux-gnu.
 
 
 Here, in fact. gcc/ChangeLog:
 
   * config/aarch64/aarch64.c (aarch64_split_simd_combine): Add V4HFmode.
   * config/aarch64/aarch64-builtins.c (VAR13, VAR14): New.
   (aarch64_scalar_builtin_types, aarch64_init_simd_builtin_scalar_types):
   Add __builtin_aarch64_simd_hf.
   * config/aarch64/arm_neon.h (float16x4x2_t, float16x8x2_t,
   float16x4x3_t, float16x8x3_t, float16x4x4_t, float16x8x4_t,
   vcombine_f16, vst2_lane_f16, vst2q_lane_f16, vst3_lane_f16,
   vst3q_lane_f16, vst4_lane_f16, vst4q_lane_f16, vld2_f16, vld2q_f16,
   vld3_f16, vld3q_f16, vld4_f16, vld4q_f16, vld2_dup_f16, vld2q_dup_f16,
   vld3_dup_f16, vld3q_dup_f16, vld4_dup_f16, vld4q_dup_f16,
   vld2_lane_f16, vld2q_lane_f16, vld3_lane_f16, vld3q_lane_f16,
   vld4_lane_f16, vld4q_lane_f16, vst2_f16, vst2q_f16, vst3_f16,
   vst3q_f16, vst4_f16, vst4q_f16, vcreate_f16): New.
 
   * config/aarch64/iterators.md (VALLDIF, Vtype, Vetype, Vbtype,
   V_cmp_result, v_cmp_result): Add cases for V4HF and V8HF.
   (VDC, Vdbl): Add V4HF.
 
 gcc/testsuite/ChangeLog:
 
   * gcc.target/aarch64/vldN_1.c: Add float16x4_t and float16x8_t cases.
   * gcc.target/aarch64/vldN_dup_1.c: Likewise.
   * gcc.target/aarch64/vldN_lane_1.c: Likewise.
  (main): update orig_data to avoid float16 NaN on bigendian.

OK, but clean up the stray newline

   arm_neon.h

 @@ -15974,6 +16086,19 @@ vld4q_u64 (const uint64_t * __a)
return ret;
  }
  
 +__extension__ static __inline float16x8x4_t __attribute__ 
 ((__always_inline__))
 +vld4q_f16 (const float16_t * __a)
 +{
 +  float16x8x4_t ret;
 +  __builtin_aarch64_simd_xi __o;
 +  __o = __builtin_aarch64_ld4v8hf (__a);
 +  ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0);
 +  ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1);
 +  ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2);
 +  ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3);
 +  return ret;
 +}
 +
  __extension__ static __inline float32x4x4_t __attribute__ 
 ((__always_inline__))
  vld4q_f32 (const float32_t * __a)
  {
 @@ -16035,6 +16160,18 @@ vld2_dup_s32 (const int32_t * __a)
return ret;
  }
  
 +

Here.

 +__extension__ static __inline float16x4x2_t __attribute__ 
 ((__always_inline__))
 +vld2_dup_f16 (const float16_t * __a)
 +{
 +  float16x4x2_t ret;
 +  __builtin_aarch64_simd_oi __o;
 +  __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a);
 +  ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
 +  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
 +  return ret;
 +}
 +
  __extension__ static __inline float32x2x2_t __attribute__ 
 ((__always_inline__))
  vld2_dup_f32 (const float32_t * __a)
  {

Thanks,
James


Re: [PATCH 9/15][AArch64] vld{2,3,4}{,_lane,_dup}, vcombine, vcreate

2015-08-06 Thread Alan Lawrence

Alan Lawrence wrote:
 James Greenhalgh wrote:
 Hi Alan,

 The arm_neon.h portion of this patch does not apply after Charles' recent
 changes. Could you please rebase and resubmit the patch for review?

 Thanks,
 James

 Ah, indeed, thanks. Here's a rebased version, using Charles' new versions of
 __(LD|ST)[234]_LANE_FUNC. I'll follow with a patch adding corresponding
 lane_f16_indices tests in a separate email.

 (Changelog as before)

 Bootstrapped + check-gcc on aarch64-none-linux-gnu.


Here, in fact. gcc/ChangeLog:

* config/aarch64/aarch64.c (aarch64_split_simd_combine): Add V4HFmode.
* config/aarch64/aarch64-builtins.c (VAR13, VAR14): New.
(aarch64_scalar_builtin_types, aarch64_init_simd_builtin_scalar_types):
Add __builtin_aarch64_simd_hf.
* config/aarch64/arm_neon.h (float16x4x2_t, float16x8x2_t,
float16x4x3_t, float16x8x3_t, float16x4x4_t, float16x8x4_t,
vcombine_f16, vst2_lane_f16, vst2q_lane_f16, vst3_lane_f16,
vst3q_lane_f16, vst4_lane_f16, vst4q_lane_f16, vld2_f16, vld2q_f16,
vld3_f16, vld3q_f16, vld4_f16, vld4q_f16, vld2_dup_f16, vld2q_dup_f16,
vld3_dup_f16, vld3q_dup_f16, vld4_dup_f16, vld4q_dup_f16,
vld2_lane_f16, vld2q_lane_f16, vld3_lane_f16, vld3q_lane_f16,
vld4_lane_f16, vld4q_lane_f16, vst2_f16, vst2q_f16, vst3_f16,
vst3q_f16, vst4_f16, vst4q_f16, vcreate_f16): New.

* config/aarch64/iterators.md (VALLDIF, Vtype, Vetype, Vbtype,
V_cmp_result, v_cmp_result): Add cases for V4HF and V8HF.
(VDC, Vdbl): Add V4HF.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/vldN_1.c: Add float16x4_t and float16x8_t cases.
* gcc.target/aarch64/vldN_dup_1.c: Likewise.
* gcc.target/aarch64/vldN_lane_1.c: Likewise.
(main): update orig_data to avoid float16 NaN on bigendian.
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 2394efdb483e1128d2990852871ab4abfed8bdfc..bcfc2c0651c344ead7d6c40656b3b34fc4d724a0 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -297,6 +297,12 @@ aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 #define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
   VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
   VAR1 (T, N, MAP, L)
+#define VAR13(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
+  VAR12 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
+  VAR1 (T, N, MAP, M)
+#define VAR14(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
+  VAR13 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
+  VAR1 (T, X, MAP, N)
 
 #include aarch64-builtin-iterators.h
 
@@ -374,6 +380,7 @@ const char *aarch64_scalar_builtin_types[] = {
   __builtin_aarch64_simd_qi,
   __builtin_aarch64_simd_hi,
   __builtin_aarch64_simd_si,
+  __builtin_aarch64_simd_hf,
   __builtin_aarch64_simd_sf,
   __builtin_aarch64_simd_di,
   __builtin_aarch64_simd_df,
@@ -661,6 +668,8 @@ aarch64_init_simd_builtin_scalar_types (void)
 	 __builtin_aarch64_simd_qi);
   (*lang_hooks.types.register_builtin_type) (intHI_type_node,
 	 __builtin_aarch64_simd_hi);
+  (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node,
+	 __builtin_aarch64_simd_hf);
   (*lang_hooks.types.register_builtin_type) (intSI_type_node,
 	 __builtin_aarch64_simd_si);
   (*lang_hooks.types.register_builtin_type) (float_type_node,
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0c40e8c6e42a3685e4865ab54f26a4883821d9d5..bf98637103d47156e5d340b3b44663524916fdaa 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1219,6 +1219,9 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 	case V2SImode:
 	  gen = gen_aarch64_simd_combinev2si;
 	  break;
+	case V4HFmode:
+	  gen = gen_aarch64_simd_combinev4hf;
+	  break;
 	case V2SFmode:
 	  gen = gen_aarch64_simd_combinev2sf;
 	  break;
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 9654584f966b192119839d8cdd30513b0d4f8f4a..f2dc95a304b53807c1b978c834224fec738a70fd 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -153,6 +153,16 @@ typedef struct uint64x2x2_t
   uint64x2_t val[2];
 } uint64x2x2_t;
 
+typedef struct float16x4x2_t
+{
+  float16x4_t val[2];
+} float16x4x2_t;
+
+typedef struct float16x8x2_t
+{
+  float16x8_t val[2];
+} float16x8x2_t;
+
 typedef struct float32x2x2_t
 {
   float32x2_t val[2];
@@ -273,6 +283,16 @@ typedef struct uint64x2x3_t
   uint64x2_t val[3];
 } uint64x2x3_t;
 
+typedef struct float16x4x3_t
+{
+  float16x4_t val[3];
+} float16x4x3_t;
+
+typedef struct float16x8x3_t
+{
+  float16x8_t val[3];
+} float16x8x3_t;
+
 typedef struct float32x2x3_t
 {
   float32x2_t val[3];
@@ -393,6 +413,16 @@ typedef struct uint64x2x4_t
   uint64x2_t val[4];
 } uint64x2x4_t;
 
+typedef struct float16x4x4_t
+{
+  float16x4_t val[4];
+} float16x4x4_t;
+

Re: [PATCH 9/15][AArch64] vld{2,3,4}{,_lane,_dup}, vcombine, vcreate

2015-08-04 Thread Alan Lawrence

James Greenhalgh wrote:

On Tue, Jul 28, 2015 at 12:25:55PM +0100, Alan Lawrence wrote:

gcc/ChangeLog:

* config/aarch64/aarch64.c (aarch64_split_simd_combine): Add V4HFmode.
* config/aarch64/aarch64-builtins.c (VAR13, VAR14): New.
(aarch64_scalar_builtin_types, aarch64_init_simd_builtin_scalar_types):
Add __builtin_aarch64_simd_hf.
* config/aarch64/arm_neon.h (float16x4x2_t, float16x8x2_t,
float16x4x3_t, float16x8x3_t, float16x4x4_t, float16x8x4_t,
vcombine_f16, vst2_lane_f16, vst2q_lane_f16, vst3_lane_f16,
vst3q_lane_f16, vst4_lane_f16, vst4q_lane_f16, vld2_f16, vld2q_f16,
vld3_f16, vld3q_f16, vld4_f16, vld4q_f16, vld2_dup_f16, vld2q_dup_f16,
vld3_dup_f16, vld3q_dup_f16, vld4_dup_f16, vld4q_dup_f16,
vld2_lane_f16, vld2q_lane_f16, vld3_lane_f16, vld3q_lane_f16,
vld4_lane_f16, vld4q_lane_f16, vst2_f16, vst2q_f16, vst3_f16,
vst3q_f16, vst4_f16, vst4q_f16, vcreate_f16): New.

* config/aarch64/iterators.md (VALLDIF, Vtype, Vetype, Vbtype,
V_cmp_result, v_cmp_result): Add cases for V4HF and V8HF.
(VDC, Vdbl): Add V4HF.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/vldN_1.c: Add float16x4_t and float16x8_t cases.
* gcc.target/aarch64/vldN_dup_1.c: Likewise.
* gcc.target/aarch64/vldN_lane_1.c: Likewise.


Hi Alan,

The arm_neon.h portion of this patch does not apply after Charles' recent
changes. Could you please rebase and resubmit the patch for review?

Thanks,
James


Ah, indeed, thanks. Here's a rebased version, using Charles' new versions of 
__(LD|ST)[234]_LANE_FUNC. I'll follow with a patch adding corresponding 
lane_f16_indices tests in a separate email.


(Changelog as before)

Bootstrapped + check-gcc on aarch64-none-linux-gnu.
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 800f6e1ffcd358aa22ceecbc460bc1dcac4acd9e..2394efdb483e1128d2990852871ab4abfed8bdfc 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -61,6 +61,7 @@
 
 #define v8qi_UP  V8QImode
 #define v4hi_UP  V4HImode
+#define v4hf_UP  V4HFmode
 #define v2si_UP  V2SImode
 #define v2sf_UP  V2SFmode
 #define v1df_UP  V1DFmode
@@ -68,6 +69,7 @@
 #define df_UPDFmode
 #define v16qi_UP V16QImode
 #define v8hi_UP  V8HImode
+#define v8hf_UP  V8HFmode
 #define v4si_UP  V4SImode
 #define v4sf_UP  V4SFmode
 #define v2di_UP  V2DImode
@@ -520,6 +522,8 @@ aarch64_simd_builtin_std_type (enum machine_mode mode,
   return aarch64_simd_intCI_type_node;
 case XImode:
   return aarch64_simd_intXI_type_node;
+case HFmode:
+  return aarch64_fp16_type_node;
 case SFmode:
   return float_type_node;
 case DFmode:
@@ -604,6 +608,8 @@ aarch64_init_simd_builtin_types (void)
   aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype;
 
   /* Continue with standard types.  */
+  aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node;
+  aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node;
   aarch64_simd_types[Float32x2_t].eltype = float_type_node;
   aarch64_simd_types[Float32x4_t].eltype = float_type_node;
   aarch64_simd_types[Float64x1_t].eltype = double_type_node;
diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def
index bb54e56ce63c040dbfe69e2249e642d2c43fd0af..ea219b72ff9ac406c2439cda002617e710b2966c 100644
--- a/gcc/config/aarch64/aarch64-simd-builtin-types.def
+++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def
@@ -44,6 +44,8 @@
   ENTRY (Poly16x8_t, V8HI, poly, 12)
   ENTRY (Poly64x1_t, DI, poly, 12)
   ENTRY (Poly64x2_t, V2DI, poly, 12)
+  ENTRY (Float16x4_t, V4HF, none, 13)
+  ENTRY (Float16x8_t, V8HF, none, 13)
   ENTRY (Float32x2_t, V2SF, none, 13)
   ENTRY (Float32x4_t, V4SF, none, 13)
   ENTRY (Float64x1_t, V1DF, none, 13)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index d0f298a1f075f51d4d47c6f364860dd1d0a545e0..39ff34e16d8bb79bcd44a4f40d214963996968af 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -367,11 +367,11 @@
   VAR1 (UNOP, float_extend_lo_, 0, v2df)
   VAR1 (UNOP, float_truncate_lo_, 0, v2sf)
 
-  /* Implemented by aarch64_ld1VALL:mode.  */
-  BUILTIN_VALL (LOAD1, ld1, 0)
+  /* Implemented by aarch64_ld1VALL_F16:mode.  */
+  BUILTIN_VALL_F16 (LOAD1, ld1, 0)
 
-  /* Implemented by aarch64_st1VALL:mode.  */
-  BUILTIN_VALL (STORE1, st1, 0)
+  /* Implemented by aarch64_st1VALL_F16:mode.  */
+  BUILTIN_VALL_F16 (STORE1, st1, 0)
 
   /* Implemented by fmamode4.  */
   BUILTIN_VDQF (TERNOP, fma, 4)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 97774181fab11b846d40c3981e2d1f9ea4891337..cab712d7d18dc8a9bebf2b25608b5b4490a07b45 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ 

[PATCH][ARM/AArch64 Testsuite] Add float16 lane_indices tests (was: Re: [PATCH 9/15][AArch64] vld{2,3,4}{,_lane,_dup}, vcombine, vcreate)

2015-08-04 Thread Alan Lawrence

James Greenhalgh wrote:

Hi Alan,

The arm_neon.h portion of this patch does not apply after Charles' recent
changes. Could you please rebase and resubmit the patch for review?

Thanks,
James


These are straightforward copies of the corresponding uint16 tests, with 
appropriate substitutions uint-float and u16-f16. As per the existing tests, 
these are xfailed on ARM targets, pending further work on PR/63870.


Cross-tested on aarch64-none-elf.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/advsimd-intrinsics/vld2_lane_indices_1.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_indices_1.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vld3_lane_indices_1.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_indices_1.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vld4_lane_indices_1.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_indices_1.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vst2_lane_indices_1.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_indices_1.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vst3_lane_indices_1.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_indices_1.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vst4_lane_indices_1.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_indices_1.c: New.
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f16_indices_1.c
new file mode 100644
index ..2174d6eaa8ff1a1d28261b5f1ef3d137d206070d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f16_indices_1.c
@@ -0,0 +1,16 @@
+#include arm_neon.h
+
+/* { dg-do compile } */
+/* { dg-skip-if  { *-*-* } { -fno-fat-lto-objects } } */
+/* { dg-excess-errors  { xfail arm*-*-* } } */
+
+float16x4x2_t
+f_vld2_lane_f16 (float16_t * p, float16x4x2_t v)
+{
+  float16x4x2_t res;
+  /* { dg-error lane 4 out of range 0 - 3  { xfail arm*-*-* } 0 } */
+  res = vld2_lane_f16 (p, v, 4);
+  /* { dg-error lane -1 out of range 0 - 3  { xfail arm*-*-* } 0 } */
+  res = vld2_lane_f16 (p, v, -1);
+  return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f16_indices_1.c
new file mode 100644
index ..83ae82c82423b9fbcb98c04d0b26ca69db7a5faa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f16_indices_1.c
@@ -0,0 +1,16 @@
+#include arm_neon.h
+
+/* { dg-do compile } */
+/* { dg-skip-if  { *-*-* } { -fno-fat-lto-objects } } */
+/* { dg-excess-errors  { xfail arm*-*-* } } */
+
+float16x8x2_t
+f_vld2q_lane_f16 (float16_t * p, float16x8x2_t v)
+{
+  float16x8x2_t res;
+  /* { dg-error lane 8 out of range 0 - 7  { xfail arm*-*-* } 0 } */
+  res = vld2q_lane_f16 (p, v, 8);
+  /* { dg-error lane -1 out of range 0 - 7  { xfail arm*-*-* } 0 } */
+  res = vld2q_lane_f16 (p, v, -1);
+  return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f16_indices_1.c
new file mode 100644
index ..21b7861ba7549ffb692effad2c4e5194c67f3a3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f16_indices_1.c
@@ -0,0 +1,16 @@
+#include arm_neon.h
+
+/* { dg-do compile } */
+/* { dg-skip-if  { *-*-* } { -fno-fat-lto-objects } } */
+/* { dg-excess-errors  { xfail arm*-*-* } } */
+
+float16x4x3_t
+f_vld3_lane_f16 (float16_t * p, float16x4x3_t v)
+{
+  float16x4x3_t res;
+  /* { dg-error lane 4 out of range 0 - 3  { xfail arm*-*-* } 0 } */
+  res = vld3_lane_f16 (p, v, 4);
+  /* { dg-error lane -1 out of range 0 - 3  { xfail arm*-*-* } 0 } */
+  res = vld3_lane_f16 (p, v, -1);
+  return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f16_indices_1.c
new file mode 100644
index ..95ec3913eef77afdf8ce1a7d7a95ddfa3bdf9fc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f16_indices_1.c
@@ -0,0 +1,16 @@
+#include arm_neon.h
+
+/* { dg-do compile } */
+/* { dg-skip-if  { *-*-* } { -fno-fat-lto-objects } } */
+/* { dg-excess-errors  { xfail arm*-*-* } } */
+
+float16x8x3_t
+f_vld3q_lane_f16 (float16_t * p, float16x8x3_t v)
+{
+  float16x8x3_t res;
+  /* { dg-error lane 8 out of range 0 - 7  { xfail arm*-*-* } 0 } */
+  res = vld3q_lane_f16 (p, v, 8);
+  /* { dg-error lane -1 out of range 0 - 7  { xfail arm*-*-* } 0 } */
+  res = vld3q_lane_f16 (p, v, -1);
+  return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f16_indices_1.c 

Re: [PATCH 9/15][AArch64] vld{2,3,4}{,_lane,_dup}, vcombine, vcreate

2015-08-04 Thread Alan Lawrence

Attachment has gone awol here too. Sorry for the bother, please ignore 
previous...

Alan Lawrence wrote:

James Greenhalgh wrote:

On Tue, Jul 28, 2015 at 12:25:55PM +0100, Alan Lawrence wrote:

gcc/ChangeLog:

* config/aarch64/aarch64.c (aarch64_split_simd_combine): Add V4HFmode.
* config/aarch64/aarch64-builtins.c (VAR13, VAR14): New.
(aarch64_scalar_builtin_types, aarch64_init_simd_builtin_scalar_types):
Add __builtin_aarch64_simd_hf.
* config/aarch64/arm_neon.h (float16x4x2_t, float16x8x2_t,
float16x4x3_t, float16x8x3_t, float16x4x4_t, float16x8x4_t,
vcombine_f16, vst2_lane_f16, vst2q_lane_f16, vst3_lane_f16,
vst3q_lane_f16, vst4_lane_f16, vst4q_lane_f16, vld2_f16, vld2q_f16,
vld3_f16, vld3q_f16, vld4_f16, vld4q_f16, vld2_dup_f16, vld2q_dup_f16,
vld3_dup_f16, vld3q_dup_f16, vld4_dup_f16, vld4q_dup_f16,
vld2_lane_f16, vld2q_lane_f16, vld3_lane_f16, vld3q_lane_f16,
vld4_lane_f16, vld4q_lane_f16, vst2_f16, vst2q_f16, vst3_f16,
vst3q_f16, vst4_f16, vst4q_f16, vcreate_f16): New.

* config/aarch64/iterators.md (VALLDIF, Vtype, Vetype, Vbtype,
V_cmp_result, v_cmp_result): Add cases for V4HF and V8HF.
(VDC, Vdbl): Add V4HF.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/vldN_1.c: Add float16x4_t and float16x8_t cases.
* gcc.target/aarch64/vldN_dup_1.c: Likewise.
* gcc.target/aarch64/vldN_lane_1.c: Likewise.

Hi Alan,

The arm_neon.h portion of this patch does not apply after Charles' recent
changes. Could you please rebase and resubmit the patch for review?

Thanks,
James


Ah, indeed, thanks. Here's a rebased version, using Charles' new versions of 
__(LD|ST)[234]_LANE_FUNC. I'll follow with a patch adding corresponding 
lane_f16_indices tests in a separate email.


(Changelog as before)

Bootstrapped + check-gcc on aarch64-none-linux-gnu.




Re: [PATCH 9/15][AArch64] vld{2,3,4}{,_lane,_dup}, vcombine, vcreate

2015-07-29 Thread James Greenhalgh
On Tue, Jul 28, 2015 at 12:25:55PM +0100, Alan Lawrence wrote:
 gcc/ChangeLog:
 
   * config/aarch64/aarch64.c (aarch64_split_simd_combine): Add V4HFmode.
   * config/aarch64/aarch64-builtins.c (VAR13, VAR14): New.
   (aarch64_scalar_builtin_types, aarch64_init_simd_builtin_scalar_types):
   Add __builtin_aarch64_simd_hf.
   * config/aarch64/arm_neon.h (float16x4x2_t, float16x8x2_t,
   float16x4x3_t, float16x8x3_t, float16x4x4_t, float16x8x4_t,
   vcombine_f16, vst2_lane_f16, vst2q_lane_f16, vst3_lane_f16,
   vst3q_lane_f16, vst4_lane_f16, vst4q_lane_f16, vld2_f16, vld2q_f16,
   vld3_f16, vld3q_f16, vld4_f16, vld4q_f16, vld2_dup_f16, vld2q_dup_f16,
   vld3_dup_f16, vld3q_dup_f16, vld4_dup_f16, vld4q_dup_f16,
   vld2_lane_f16, vld2q_lane_f16, vld3_lane_f16, vld3q_lane_f16,
   vld4_lane_f16, vld4q_lane_f16, vst2_f16, vst2q_f16, vst3_f16,
   vst3q_f16, vst4_f16, vst4q_f16, vcreate_f16): New.
 
   * config/aarch64/iterators.md (VALLDIF, Vtype, Vetype, Vbtype,
   V_cmp_result, v_cmp_result): Add cases for V4HF and V8HF.
   (VDC, Vdbl): Add V4HF.
 
 gcc/testsuite/ChangeLog:
 
   * gcc.target/aarch64/vldN_1.c: Add float16x4_t and float16x8_t cases.
   * gcc.target/aarch64/vldN_dup_1.c: Likewise.
   * gcc.target/aarch64/vldN_lane_1.c: Likewise.

Hi Alan,

The arm_neon.h portion of this patch does not apply after Charles' recent
changes. Could you please rebase and resubmit the patch for review?

Thanks,
James

 @@ -1,6 +10044,8 @@ vst2_lane_ ## funcsuffix (ptrtype *__ptr, 
  \
__ptr, __o, __c);   \
  }
  
 +__ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v8hf, hf, f16,
 +  float16x8_t)
  __ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
float32x4_t)
  __ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,

Hunks like this fail, as the macro should look like ( from
config/aarch64/arm_neon.h ):


#define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode,\
qmode, ptr_mode, funcsuffix, signedtype) \

__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32,
 float32x4_t)

So I would expect the lines you add to look something like:

 +__ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, 
 f16,
 +  float16x8_t)

Thanks,
James