Re: [backport gcc-10][AArch64] ACLE bf16 convert

2020-12-11 Thread Dennis Zhang via Gcc-patches
> 
> From: Kyrylo Tkachov 
> Sent: Friday, December 11, 2020 11:23 AM
> To: Dennis Zhang; gcc-patches@gcc.gnu.org
> Cc: nd; Richard Earnshaw; Marcus Shawcroft; Richard Sandiford
> Subject: RE: [backport gcc-10][AArch64] ACLE bf16 convert
> 
> > -Original Message-
> > From: Dennis Zhang 
> > Sent: 10 December 2020 14:27
> > To: gcc-patches@gcc.gnu.org
> > Cc: nd ; Richard Earnshaw ;
> > Marcus Shawcroft ; Kyrylo Tkachov
> > ; Richard Sandiford
> > 
> > Subject: [backport gcc-10][AArch64] ACLE bf16 convert
> >
> > Hi all,
> >
> > This patch backports the commit
> > f7d6961126a7f06c8089d8a58bd21be43bc16806.
> > The original is approved at https://gcc.gnu.org/pipermail/gcc-patches/2020-
> > November/557859.html
> > The only change is to remove FPCR-reading flags for builtin definition since
> > it's not supported in gcc-10.
> > Regtested and bootstrapped for aarch64-none-linux-gnu.
> >
> > Is it OK to backport?
> 
> Ok.
> Thanks,
> Kyrill

Thanks Kyrill!
The patch is committed as 702e45ee471422dee86d32fc84f617d341d33175.

Bests
Dennis


RE: [backport gcc-10][AArch64] ACLE bf16 convert

2020-12-11 Thread Kyrylo Tkachov via Gcc-patches



> -Original Message-
> From: Dennis Zhang 
> Sent: 10 December 2020 14:27
> To: gcc-patches@gcc.gnu.org
> Cc: nd ; Richard Earnshaw ;
> Marcus Shawcroft ; Kyrylo Tkachov
> ; Richard Sandiford
> 
> Subject: [backport gcc-10][AArch64] ACLE bf16 convert
> 
> Hi all,
> 
> This patch backports the commit
> f7d6961126a7f06c8089d8a58bd21be43bc16806.
> The original is approved at https://gcc.gnu.org/pipermail/gcc-patches/2020-
> November/557859.html
> The only change is to remove FPCR-reading flags for builtin definition since
> it's not supported in gcc-10.
> Regtested and bootstrapped for aarch64-none-linux-gnu.
> 
> Is it OK to backport?

Ok.
Thanks,
Kyrill

> 
> Cheers
> Dennis


[backport gcc-10][AArch64] ACLE bf16 convert

2020-12-10 Thread Dennis Zhang via Gcc-patches
Hi all,

This patch backports the commit f7d6961126a7f06c8089d8a58bd21be43bc16806.
The original is approved at 
https://gcc.gnu.org/pipermail/gcc-patches/2020-November/557859.html
The only change is to remove FPCR-reading flags for builtin definition since 
it's not supported in gcc-10.
Regtested and bootstrapped for aarch64-none-linux-gnu.

Is it OK to backport?

Cheers
Dennisdiff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index ba2bda26dcdd4947dc724851433451433d378724..7192f3954d311d89064707cfcb735efad4377c12 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -728,3 +728,8 @@
   VAR1 (UNOP, bfcvtn_q, 0, v8bf)
   VAR1 (BINOP, bfcvtn2, 0, v8bf)
   VAR1 (UNOP, bfcvt, 0, bf)
+
+  /* Implemented by aarch64_{v}bfcvt{_high}.  */
+  VAR2 (UNOP, vbfcvt, 0, v4bf, v8bf)
+  VAR1 (UNOP, vbfcvt_high, 0, v8bf)
+  VAR1 (UNOP, bfcvt, 0, sf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 9f0e2bd1e6ff5246f84e919402c687687a84beb8..2e8aa668b107f039e4958b6998da180a6d11b881 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7238,3 +7238,31 @@
   "bfcvt\\t%h0, %s1"
   [(set_attr "type" "f_cvt")]
 )
+
+;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
+(define_insn "aarch64_vbfcvt"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+	(unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
+		  UNSPEC_BFCVTN))]
+  "TARGET_BF16_SIMD"
+  "shll\\t%0.4s, %1.4h, #16"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_vbfcvt_highv8bf"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+	(unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
+		  UNSPEC_BFCVTN2))]
+  "TARGET_BF16_SIMD"
+  "shll2\\t%0.4s, %1.8h, #16"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_bfcvtsf"
+  [(set (match_operand:SF 0 "register_operand" "=w")
+	(unspec:SF [(match_operand:BF 1 "register_operand" "w")]
+		UNSPEC_BFCVT))]
+  "TARGET_BF16_FP"
+  "shl\\t%d0, %d1, #16"
+  [(set_attr "type" "neon_shift_imm")]
+)
diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
index 984875dcc014300c489209c11abf41b1c47b7fbe..881615498d3d52662d7ebb3ab1e8d52d5a40cab8 100644
--- a/gcc/config/aarch64/arm_bf16.h
+++ b/gcc/config/aarch64/arm_bf16.h
@@ -40,6 +40,13 @@ vcvth_bf16_f32 (float32_t __a)
   return __builtin_aarch64_bfcvtbf (__a);
 }
 
+__extension__ extern __inline float32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtah_f32_bf16 (bfloat16_t __a)
+{
+  return __builtin_aarch64_bfcvtsf (__a);
+}
+
 #pragma GCC pop_options
 
 #endif
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 95bfa5ebba21b739ee3c84e3971337646f8881d4..69cccd3278642814f3961c5bf52be5639f5ef3f3 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -35680,6 +35680,27 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
   return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
 }
 
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvt_f32_bf16 (bfloat16x4_t __a)
+{
+  return __builtin_aarch64_vbfcvtv4bf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_low_f32_bf16 (bfloat16x8_t __a)
+{
+  return __builtin_aarch64_vbfcvtv8bf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_high_f32_bf16 (bfloat16x8_t __a)
+{
+  return __builtin_aarch64_vbfcvt_highv8bf (__a);
+}
+
 __extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvt_bf16_f32 (float32x4_t __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
index bbea630b1820d578bdf1619834f29b919f5c3f32..47af7c494d9b9d1f4b63e802efc293348a40e270 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
@@ -46,3 +46,43 @@ bfloat16_t test_bfcvt (float32_t a)
 {
   return vcvth_bf16_f32 (a);
 }
+
+/*
+**test_vcvt_f32_bf16:
+** shll	v0.4s, v0.4h, #16
+** ret
+*/
+float32x4_t test_vcvt_f32_bf16 (bfloat16x4_t a)
+{
+  return vcvt_f32_bf16 (a);
+}
+
+/*
+**test_vcvtq_low_f32_bf16:
+** shll	v0.4s, v0.4h, #16
+** ret
+*/
+float32x4_t test_vcvtq_low_f32_bf16 (bfloat16x8_t a)
+{
+  return vcvtq_low_f32_bf16 (a);
+}
+
+/*
+**test_vcvtq_high_f32_bf16:
+** shll2	v0.4s, v0.8h, #16
+** ret
+*/
+float32x4_t test_vcvtq_high_f32_bf16 (bfloat16x8_t a)
+{
+  return vcvtq_high_f32_bf16 (a);
+}
+
+/*
+**test_vcvtah_f32_bf16:
+** shl	d0, d0,