Re: [AARCH64] implements neon vld1_*_x2 intrinsics

2018-01-03 Thread Christophe Lyon
Hi Kugan,


On 15 November 2017 at 12:23, James Greenhalgh  wrote:
> On Wed, Nov 15, 2017 at 09:58:28AM +, Kyrill Tkachov wrote:
>> Hi Kugan,
>>
>> On 07/11/17 04:10, Kugan Vivekanandarajah wrote:
>> > Hi,
>> >
>> > Attached patch implements the  vld1_*_x2 intrinsics as defined by the
>> > neon document.
>> >
>> > Bootstrap for the latest patch is ongoing on aarch64-linux-gnu. Is
>> > this OK for trunk if no regressions?
>> >
>>
>> This looks mostly ok to me (though I cannot approve) modulo a couple of
>> minor type issues below.
>
> Thanks for the review Kyrill!
>
> I'm happy to trust Kyrill's knowledge of the back-end here, so the patch
> is OK with the changes Kyrill requested.
>
> Thanks for the patch!
>
> James
>
>> > gcc/ChangeLog:
>> >
>> > 2017-11-06  Kugan Vivekanandarajah 
>> >
>> > * config/aarch64/aarch64-simd.md (aarch64_ld1x2): New.
>> > (aarch64_ld1x2): Likewise.
>> > (aarch64_simd_ld1_x2): Likewise.
>> > (aarch64_simd_ld1_x2): Likewise.
>> > * config/aarch64/arm_neon.h (vld1_u8_x2): New.
>> > (vld1_s8_x2): Likewise.
>> > (vld1_u16_x2): Likewise.
>> > (vld1_s16_x2): Likewise.
>> > (vld1_u32_x2): Likewise.
>> > (vld1_s32_x2): Likewise.
>> > (vld1_u64_x2): Likewise.
>> > (vld1_s64_x2): Likewise.
>> > (vld1_f16_x2): Likewise.
>> > (vld1_f32_x2): Likewise.
>> > (vld1_f64_x2): Likewise.
>> > (vld1_p8_x2): Likewise.
>> > (vld1_p16_x2): Likewise.
>> > (vld1_p64_x2): Likewise.
>> > (vld1q_u8_x2): Likewise.
>> > (vld1q_s8_x2): Likewise.
>> > (vld1q_u16_x2): Likewise.
>> > (vld1q_s16_x2): Likewise.
>> > (vld1q_u32_x2): Likewise.
>> > (vld1q_s32_x2): Likewise.
>> > (vld1q_u64_x2): Likewise.
>> > (vld1q_s64_x2): Likewise.
>> > (vld1q_f16_x2): Likewise.
>> > (vld1q_f32_x2): Likewise.
>> > (vld1q_f64_x2): Likewise.
>> > (vld1q_p8_x2): Likewise.
>> > (vld1q_p16_x2): Likewise.
>> > (vld1q_p64_x2): Likewise.
>> >
>> > gcc/testsuite/ChangeLog:
>> >
>> > 2017-11-06  Kugan Vivekanandarajah 
>> >
>> > * gcc.target/aarch64/advsimd-intrinsics/vld1x2.c: New test.
>>

Sorry for not seeing this before you committed this patch, but the new
test fails to compile on arm targets.
Can you add the proper guard, as there is in other tests in the same dir?

Other question: why do you force -O3? The harness iterates on O0, O1, 

Thanks,

Christophe


>> +__extension__ extern __inline int8x8x2_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vld1_s8_x2 (const uint8_t *__a)
>>
>> This should be "const int8_t *"
>>
>>   +{
>> +  int8x8x2_t ret;
>> +  __builtin_aarch64_simd_oi __o;
>> +  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) 
>> __a);
>> +  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
>> +  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
>> +  return ret;
>> +}
>>
>> ...
>>
>> +__extension__ extern __inline int32x2x2_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vld1_s32_x2 (const uint32_t *__a)
>>
>> Likewise, this should be "const int32_t *"
>>
>> +{
>> +  int32x2x2_t ret;
>> +  __builtin_aarch64_simd_oi __o;
>> +  __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) 
>> __a);
>> +  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
>> +  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
>> +  return ret;
>> +}
>> +
>>
>>


Re: [AARCH64] implements neon vld1_*_x2 intrinsics

2017-11-15 Thread James Greenhalgh
On Wed, Nov 15, 2017 at 09:58:28AM +, Kyrill Tkachov wrote:
> Hi Kugan,
> 
> On 07/11/17 04:10, Kugan Vivekanandarajah wrote:
> > Hi,
> >
> > Attached patch implements the  vld1_*_x2 intrinsics as defined by the
> > neon document.
> >
> > Bootstrap for the latest patch is ongoing on aarch64-linux-gnu. Is
> > this OK for trunk if no regressions?
> >
> 
> This looks mostly ok to me (though I cannot approve) modulo a couple of 
> minor type issues below.

Thanks for the review Kyrill!

I'm happy to trust Kyrill's knowledge of the back-end here, so the patch
is OK with the changes Kyrill requested.

Thanks for the patch!

James

> > gcc/ChangeLog:
> >
> > 2017-11-06  Kugan Vivekanandarajah 
> >
> > * config/aarch64/aarch64-simd.md (aarch64_ld1x2): New.
> > (aarch64_ld1x2): Likewise.
> > (aarch64_simd_ld1_x2): Likewise.
> > (aarch64_simd_ld1_x2): Likewise.
> > * config/aarch64/arm_neon.h (vld1_u8_x2): New.
> > (vld1_s8_x2): Likewise.
> > (vld1_u16_x2): Likewise.
> > (vld1_s16_x2): Likewise.
> > (vld1_u32_x2): Likewise.
> > (vld1_s32_x2): Likewise.
> > (vld1_u64_x2): Likewise.
> > (vld1_s64_x2): Likewise.
> > (vld1_f16_x2): Likewise.
> > (vld1_f32_x2): Likewise.
> > (vld1_f64_x2): Likewise.
> > (vld1_p8_x2): Likewise.
> > (vld1_p16_x2): Likewise.
> > (vld1_p64_x2): Likewise.
> > (vld1q_u8_x2): Likewise.
> > (vld1q_s8_x2): Likewise.
> > (vld1q_u16_x2): Likewise.
> > (vld1q_s16_x2): Likewise.
> > (vld1q_u32_x2): Likewise.
> > (vld1q_s32_x2): Likewise.
> > (vld1q_u64_x2): Likewise.
> > (vld1q_s64_x2): Likewise.
> > (vld1q_f16_x2): Likewise.
> > (vld1q_f32_x2): Likewise.
> > (vld1q_f64_x2): Likewise.
> > (vld1q_p8_x2): Likewise.
> > (vld1q_p16_x2): Likewise.
> > (vld1q_p64_x2): Likewise.
> >
> > gcc/testsuite/ChangeLog:
> >
> > 2017-11-06  Kugan Vivekanandarajah 
> >
> > * gcc.target/aarch64/advsimd-intrinsics/vld1x2.c: New test.
> 
> +__extension__ extern __inline int8x8x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vld1_s8_x2 (const uint8_t *__a)
> 
> This should be "const int8_t *"
> 
>   +{
> +  int8x8x2_t ret;
> +  __builtin_aarch64_simd_oi __o;
> +  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) 
> __a);
> +  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
> +  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
> +  return ret;
> +}
> 
> ...
> 
> +__extension__ extern __inline int32x2x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vld1_s32_x2 (const uint32_t *__a)
> 
> Likewise, this should be "const int32_t *"
> 
> +{
> +  int32x2x2_t ret;
> +  __builtin_aarch64_simd_oi __o;
> +  __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) 
> __a);
> +  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
> +  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
> +  return ret;
> +}
> +
> 
> 


Re: [AARCH64] implements neon vld1_*_x2 intrinsics

2017-11-15 Thread Kyrill Tkachov

Hi Kugan,

On 07/11/17 04:10, Kugan Vivekanandarajah wrote:

Hi,

Attached patch implements the  vld1_*_x2 intrinsics as defined by the
neon document.

Bootstrap for the latest patch is ongoing on aarch64-linux-gnu. Is
this OK for trunk if no regressions?



This looks mostly ok to me (though I cannot approve) modulo a couple of 
minor type issues below.


Thanks,
Kyrill


Thanks,
Kugan

gcc/ChangeLog:

2017-11-06  Kugan Vivekanandarajah 

* config/aarch64/aarch64-simd.md (aarch64_ld1x2): New.
(aarch64_ld1x2): Likewise.
(aarch64_simd_ld1_x2): Likewise.
(aarch64_simd_ld1_x2): Likewise.
* config/aarch64/arm_neon.h (vld1_u8_x2): New.
(vld1_s8_x2): Likewise.
(vld1_u16_x2): Likewise.
(vld1_s16_x2): Likewise.
(vld1_u32_x2): Likewise.
(vld1_s32_x2): Likewise.
(vld1_u64_x2): Likewise.
(vld1_s64_x2): Likewise.
(vld1_f16_x2): Likewise.
(vld1_f32_x2): Likewise.
(vld1_f64_x2): Likewise.
(vld1_p8_x2): Likewise.
(vld1_p16_x2): Likewise.
(vld1_p64_x2): Likewise.
(vld1q_u8_x2): Likewise.
(vld1q_s8_x2): Likewise.
(vld1q_u16_x2): Likewise.
(vld1q_s16_x2): Likewise.
(vld1q_u32_x2): Likewise.
(vld1q_s32_x2): Likewise.
(vld1q_u64_x2): Likewise.
(vld1q_s64_x2): Likewise.
(vld1q_f16_x2): Likewise.
(vld1q_f32_x2): Likewise.
(vld1q_f64_x2): Likewise.
(vld1q_p8_x2): Likewise.
(vld1q_p16_x2): Likewise.
(vld1q_p64_x2): Likewise.

gcc/testsuite/ChangeLog:

2017-11-06  Kugan Vivekanandarajah 

* gcc.target/aarch64/advsimd-intrinsics/vld1x2.c: New test.


+__extension__ extern __inline int8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s8_x2 (const uint8_t *__a)

This should be "const int8_t *"

 +{
+  int8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}

...

+__extension__ extern __inline int32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s32_x2 (const uint32_t *__a)

Likewise, this should be "const int32_t *"

+{
+  int32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
+  return ret;
+}
+




Re: [AARCH64] implements neon vld1_*_x2 intrinsics

2017-11-14 Thread Kugan Vivekanandarajah
Ping?

Thanks,
Kugan

On 7 November 2017 at 15:10, Kugan Vivekanandarajah
 wrote:
> Hi,
>
> Attached patch implements the  vld1_*_x2 intrinsics as defined by the
> neon document.
>
> Bootstrap for the latest patch is ongoing on aarch64-linux-gnu. Is
> this OK for trunk if no regressions?
>
> Thanks,
> Kugan
>
> gcc/ChangeLog:
>
> 2017-11-06  Kugan Vivekanandarajah  
>
> * config/aarch64/aarch64-simd.md (aarch64_ld1x2): New.
> (aarch64_ld1x2): Likewise.
> (aarch64_simd_ld1_x2): Likewise.
> (aarch64_simd_ld1_x2): Likewise.
> * config/aarch64/arm_neon.h (vld1_u8_x2): New.
> (vld1_s8_x2): Likewise.
> (vld1_u16_x2): Likewise.
> (vld1_s16_x2): Likewise.
> (vld1_u32_x2): Likewise.
> (vld1_s32_x2): Likewise.
> (vld1_u64_x2): Likewise.
> (vld1_s64_x2): Likewise.
> (vld1_f16_x2): Likewise.
> (vld1_f32_x2): Likewise.
> (vld1_f64_x2): Likewise.
> (vld1_p8_x2): Likewise.
> (vld1_p16_x2): Likewise.
> (vld1_p64_x2): Likewise.
> (vld1q_u8_x2): Likewise.
> (vld1q_s8_x2): Likewise.
> (vld1q_u16_x2): Likewise.
> (vld1q_s16_x2): Likewise.
> (vld1q_u32_x2): Likewise.
> (vld1q_s32_x2): Likewise.
> (vld1q_u64_x2): Likewise.
> (vld1q_s64_x2): Likewise.
> (vld1q_f16_x2): Likewise.
> (vld1q_f32_x2): Likewise.
> (vld1q_f64_x2): Likewise.
> (vld1q_p8_x2): Likewise.
> (vld1q_p16_x2): Likewise.
> (vld1q_p64_x2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> 2017-11-06  Kugan Vivekanandarajah  
>
> * gcc.target/aarch64/advsimd-intrinsics/vld1x2.c: New test.


[AARCH64] implements neon vld1_*_x2 intrinsics

2017-11-06 Thread Kugan Vivekanandarajah
Hi,

Attached patch implements the  vld1_*_x2 intrinsics as defined by the
neon document.

Bootstrap for the latest patch is ongoing on aarch64-linux-gnu. Is
this OK for trunk if no regressions?

Thanks,
Kugan

gcc/ChangeLog:

2017-11-06  Kugan Vivekanandarajah  

* config/aarch64/aarch64-simd.md (aarch64_ld1x2): New.
(aarch64_ld1x2): Likewise.
(aarch64_simd_ld1_x2): Likewise.
(aarch64_simd_ld1_x2): Likewise.
* config/aarch64/arm_neon.h (vld1_u8_x2): New.
(vld1_s8_x2): Likewise.
(vld1_u16_x2): Likewise.
(vld1_s16_x2): Likewise.
(vld1_u32_x2): Likewise.
(vld1_s32_x2): Likewise.
(vld1_u64_x2): Likewise.
(vld1_s64_x2): Likewise.
(vld1_f16_x2): Likewise.
(vld1_f32_x2): Likewise.
(vld1_f64_x2): Likewise.
(vld1_p8_x2): Likewise.
(vld1_p16_x2): Likewise.
(vld1_p64_x2): Likewise.
(vld1q_u8_x2): Likewise.
(vld1q_s8_x2): Likewise.
(vld1q_u16_x2): Likewise.
(vld1q_s16_x2): Likewise.
(vld1q_u32_x2): Likewise.
(vld1q_s32_x2): Likewise.
(vld1q_u64_x2): Likewise.
(vld1q_s64_x2): Likewise.
(vld1q_f16_x2): Likewise.
(vld1q_f32_x2): Likewise.
(vld1q_f64_x2): Likewise.
(vld1q_p8_x2): Likewise.
(vld1q_p16_x2): Likewise.
(vld1q_p64_x2): Likewise.

gcc/testsuite/ChangeLog:

2017-11-06  Kugan Vivekanandarajah  

* gcc.target/aarch64/advsimd-intrinsics/vld1x2.c: New test.
From dfdd8eba9fb49a776cdf8d82c0e34db0fb30d1b5 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah 
Date: Sat, 30 Sep 2017 04:51:08 +1000
Subject: [PATCH] add missing ld1 x2 builtins

---
 gcc/config/aarch64/aarch64-simd-builtins.def   |   6 +-
 gcc/config/aarch64/aarch64-simd.md |  48 +++
 gcc/config/aarch64/arm_neon.h  | 336 +
 .../gcc.target/aarch64/advsimd-intrinsics/vld1x2.c |  71 +
 4 files changed, 460 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index d713d5d..90736ba 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -86,6 +86,10 @@
   VAR1 (SETREGP, set_qregoi, 0, v2di)
   VAR1 (SETREGP, set_qregci, 0, v2di)
   VAR1 (SETREGP, set_qregxi, 0, v2di)
+  /* Implemented by aarch64_ld1x2. */
+  BUILTIN_VQ (LOADSTRUCT, ld1x2, 0)
+  /* Implemented by aarch64_ld1x2. */
+  BUILTIN_VDC (LOADSTRUCT, ld1x2, 0)
   /* Implemented by aarch64_ld.  */
   BUILTIN_VDC (LOADSTRUCT, ld2, 0)
   BUILTIN_VDC (LOADSTRUCT, ld3, 0)
@@ -563,4 +567,4 @@
   BUILTIN_GPI (UNOP, fix_truncdf, 2)
   BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
   BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
-  BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
\ No newline at end of file
+  BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 70e9339..a7ed594 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5071,6 +5071,33 @@
   DONE;
 })
 
+(define_expand "aarch64_ld1x2"
+ [(match_operand:OI 0 "register_operand" "=w")
+  (match_operand:DI 1 "register_operand" "r")
+  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  machine_mode mode = OImode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  emit_insn (gen_aarch64_simd_ld1_x2 (operands[0], mem));
+  DONE;
+})
+
+(define_expand "aarch64_ld1x2"
+ [(match_operand:OI 0 "register_operand" "=w")
+  (match_operand:DI 1 "register_operand" "r")
+  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  machine_mode mode = OImode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  emit_insn (gen_aarch64_simd_ld1_x2 (operands[0], mem));
+  DONE;
+})
+
+
 (define_expand "aarch64_ld_lane"
   [(match_operand:VSTRUCT 0 "register_operand" "=w")
 	(match_operand:DI 1 "register_operand" "w")
@@ -5458,6 +5485,27 @@
   [(set_attr "type" "neon_load1_all_lanes")]
 )
 
+(define_insn "aarch64_simd_ld1_x2"
+  [(set (match_operand:OI 0 "register_operand" "=w")
+	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
+		(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_LD1))]
+  "TARGET_SIMD"
+  "ld1\\t{%S0. - %T0.}, %1"
+  [(set_attr "type" "neon_load1_2reg")]
+)
+
+(define_insn "aarch64_simd_ld1_x2"
+  [(set (match_operand:OI 0 "register_operand" "=w")
+	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
+		(unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_LD1))]
+  "TARGET_SIMD"
+  "ld1\\t{%S0. - %T0.}, %1"
+  [(set_attr "type" "neon_load1_2reg")]
+)
+
+
 (define_insn "aarch64_frecpe"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h