[PATCH] D82501: [sve][acle] Add reinterpret intrinsics for brain float.

Francesco Petrogalli via Phabricator via cfe-commits Thu, 25 Jun 2020 09:11:26 -0700

fpetrogalli updated this revision to Diff 273399.
fpetrogalli marked an inline comment as done.
fpetrogalli added a comment.


@david-arm, at the end I decided to add the `ASM-NOT` test, it was easy and 
came for free.

Also, I have moved the IR tests in the file with all other bitcasts, using a 
funcion attribute to enable the bf16 feature only for those functions that deal 
with bfloats.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82501/new/

https://reviews.llvm.org/D82501

Files:
  clang/utils/TableGen/SveEmitter.cpp
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/test/CodeGen/AArch64/sve-bitcast-bfloat.ll
  llvm/test/CodeGen/AArch64/sve-bitcast.ll

Index: llvm/test/CodeGen/AArch64/sve-bitcast.ll
===================================================================
--- llvm/test/CodeGen/AArch64/sve-bitcast.ll
+++ llvm/test/CodeGen/AArch64/sve-bitcast.ll
@@ -340,3 +340,118 @@
   %bc = bitcast <vscale x 4 x float> %v to <vscale x 2 x double>
   ret <vscale x 2 x double> %bc
 }
+
+define <vscale x 16 x i8> @bitcast_bfloat_to_i8(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_bfloat_to_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %bc
+}
+
+define <vscale x 8 x i16> @bitcast_bfloat_to_i16(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_bfloat_to_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %bc
+}
+
+define <vscale x 4 x i32> @bitcast_bfloat_to_i32(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_bfloat_to_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %bc
+}
+
+define <vscale x 2 x i64> @bitcast_bfloat_to_i64(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_bfloat_to_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %bc
+}
+
+define <vscale x 8 x half> @bitcast_bfloat_to_half(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_bfloat_to_half:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 8 x half>
+  ret <vscale x 8 x half> %bc
+}
+
+define <vscale x 4 x float> @bitcast_bfloat_to_float(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_bfloat_to_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 4 x float>
+  ret <vscale x 4 x float> %bc
+}
+
+define <vscale x 2 x double> @bitcast_bfloat_to_double(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_bfloat_to_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 2 x double>
+  ret <vscale x 2 x double> %bc
+}
+
+define <vscale x 8 x bfloat> @bitcast_i8_to_bfloat(<vscale x 16 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_i8_to_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 16 x i8> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
+}
+
+define <vscale x 8 x bfloat> @bitcast_i16_to_bfloat(<vscale x 8 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_i16_to_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x i16> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
+}
+
+define <vscale x 8 x bfloat> @bitcast_i32_to_bfloat(<vscale x 4 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_i32_to_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x i32> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
+}
+
+define <vscale x 8 x bfloat> @bitcast_i64_to_bfloat(<vscale x 2 x i64> %v) #0 {
+; CHECK-LABEL: bitcast_i64_to_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x i64> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
+}
+
+define <vscale x 8 x bfloat> @bitcast_half_to_bfloat(<vscale x 8 x half> %v) #0 {
+; CHECK-LABEL: bitcast_half_to_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x half> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
+}
+
+define <vscale x 8 x bfloat> @bitcast_float_to_bfloat(<vscale x 4 x float> %v) #0 {
+; CHECK-LABEL: bitcast_float_to_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x float> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
+}
+
+define <vscale x 8 x bfloat> @bitcast_double_to_bfloat(<vscale x 2 x double> %v) #0 {
+; CHECK-LABEL: bitcast_double_to_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x double> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
+}
+
+; +bf16 is required for the bfloat version.
+attributes #0 = { "target-features"="+sve,+bf16" }
Index: llvm/test/CodeGen/AArch64/sve-bitcast-bfloat.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-bitcast-bfloat.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s 2>%t | FileCheck %s
+; RUN: not --crash llc -mtriple=aarch64_be -mattr=+sve,+bf16 < %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; WARN-NOT: warning
+
+define <vscale x 16 x i8> @bitcast_bfloat_to_i8(<vscale x 8 x bfloat> %v) {
+; CHECK-LABEL: bitcast_bfloat_to_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %bc
+}
+
+define <vscale x 8 x i16> @bitcast_bfloat_to_i16(<vscale x 8 x bfloat> %v) {
+; CHECK-LABEL: bitcast_bfloat_to_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %bc
+}
+
+define <vscale x 4 x i32> @bitcast_bfloat_to_i32(<vscale x 8 x bfloat> %v) {
+; CHECK-LABEL: bitcast_bfloat_to_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %bc
+}
+
+define <vscale x 2 x i64> @bitcast_bfloat_to_i64(<vscale x 8 x bfloat> %v) {
+; CHECK-LABEL: bitcast_bfloat_to_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %bc
+}
+
+define <vscale x 8 x half> @bitcast_bfloat_to_half(<vscale x 8 x bfloat> %v) {
+; CHECK-LABEL: bitcast_bfloat_to_half:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 8 x half>
+  ret <vscale x 8 x half> %bc
+}
+
+define <vscale x 4 x float> @bitcast_bfloat_to_float(<vscale x 8 x bfloat> %v) {
+; CHECK-LABEL: bitcast_bfloat_to_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 4 x float>
+  ret <vscale x 4 x float> %bc
+}
+
+define <vscale x 2 x double> @bitcast_bfloat_to_double(<vscale x 8 x bfloat> %v) {
+; CHECK-LABEL: bitcast_bfloat_to_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 2 x double>
+  ret <vscale x 2 x double> %bc
+}
+
+define <vscale x 8 x bfloat> @bitcast_i8_to_bfloat(<vscale x 16 x i8> %v) {
+; CHECK-LABEL: bitcast_i8_to_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 16 x i8> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
+}
+
+define <vscale x 8 x bfloat> @bitcast_i16_to_bfloat(<vscale x 8 x i16> %v) {
+; CHECK-LABEL: bitcast_i16_to_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x i16> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
+}
+
+define <vscale x 8 x bfloat> @bitcast_i32_to_bfloat(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: bitcast_i32_to_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x i32> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
+}
+
+define <vscale x 8 x bfloat> @bitcast_i64_to_bfloat(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: bitcast_i64_to_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x i64> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
+}
+
+define <vscale x 8 x bfloat> @bitcast_half_to_bfloat(<vscale x 8 x half> %v) {
+; CHECK-LABEL: bitcast_half_to_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x half> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
+}
+
+define <vscale x 8 x bfloat> @bitcast_float_to_bfloat(<vscale x 4 x float> %v) {
+; CHECK-LABEL: bitcast_float_to_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x float> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
+}
+
+define <vscale x 8 x bfloat> @bitcast_double_to_bfloat(<vscale x 2 x double> %v) {
+; CHECK-LABEL: bitcast_double_to_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x double> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
+}
+
Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1435,7 +1435,6 @@
 
     def : Pat<(nxv8f16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8f16 ZPR:$src)>;
     def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>;
-    def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
     def : Pat<(nxv8f16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8f16 ZPR:$src)>;
     def : Pat<(nxv8f16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8f16 ZPR:$src)>;
     def : Pat<(nxv8f16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8f16 ZPR:$src)>;
@@ -1456,6 +1455,24 @@
     def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
   }
 
+  let Predicates = [IsLE, HasSVE, HasBF16] in {
+    def : Pat<(nxv8bf16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+    def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+    def : Pat<(nxv8bf16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+    def : Pat<(nxv8bf16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+    def : Pat<(nxv8bf16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+    def : Pat<(nxv8bf16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+    def : Pat<(nxv8bf16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+
+    def : Pat<(nxv16i8 (bitconvert (nxv8bf16 ZPR:$src))), (nxv16i8 ZPR:$src)>;
+    def : Pat<(nxv8i16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8i16 ZPR:$src)>;
+    def : Pat<(nxv4i32 (bitconvert (nxv8bf16 ZPR:$src))), (nxv4i32 ZPR:$src)>;
+    def : Pat<(nxv2i64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+    def : Pat<(nxv8f16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8f16 ZPR:$src)>;
+    def : Pat<(nxv4f32 (bitconvert (nxv8bf16 ZPR:$src))), (nxv4f32 ZPR:$src)>;
+    def : Pat<(nxv2f64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+  }
+
   def : Pat<(nxv16i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
   def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
   def : Pat<(nxv16i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
Index: clang/utils/TableGen/SveEmitter.cpp
===================================================================
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -248,13 +248,13 @@
     const char *Type;
     const char *BuiltinType;
   };
-  SmallVector<ReinterpretTypeInfo, 11> Reinterprets = {
+  SmallVector<ReinterpretTypeInfo, 12> Reinterprets = {
       {"s8", "svint8_t", "q16Sc"},   {"s16", "svint16_t", "q8Ss"},
       {"s32", "svint32_t", "q4Si"},  {"s64", "svint64_t", "q2SWi"},
       {"u8", "svuint8_t", "q16Uc"},  {"u16", "svuint16_t", "q8Us"},
       {"u32", "svuint32_t", "q4Ui"}, {"u64", "svuint64_t", "q2UWi"},
-      {"f16", "svfloat16_t", "q8h"}, {"f32", "svfloat32_t", "q4f"},
-      {"f64", "svfloat64_t", "q2d"}};
+      {"f16", "svfloat16_t", "q8h"}, {"bf16", "svbfloat16_t", "q8y"},
+      {"f32", "svfloat32_t", "q4f"}, {"f64", "svfloat64_t", "q2d"}};
 
   RecordKeeper &Records;
   llvm::StringMap<uint64_t> EltTypes;
@@ -1208,6 +1208,10 @@
   for (auto ShortForm : { false, true } )
     for (const ReinterpretTypeInfo &From : Reinterprets)
       for (const ReinterpretTypeInfo &To : Reinterprets) {
+        const bool IsBFloat = StringRef(From.Suffix).equals("bf16") ||
+                              StringRef(To.Suffix).equals("bf16");
+        if (IsBFloat)
+          OS << "#if defined(__ARM_FEATURE_SVE_BF16)\n";
         if (ShortForm) {
           OS << "__aio " << From.Type << " svreinterpret_" << From.Suffix;
           OS << "(" << To.Type << " op) {\n";
@@ -1218,6 +1222,8 @@
           OS << "#define svreinterpret_" << From.Suffix << "_" << To.Suffix
              << "(...) __builtin_sve_reinterpret_" << From.Suffix << "_"
              << To.Suffix << "(__VA_ARGS__)\n";
+        if (IsBFloat)
+          OS << "#endif /* #if defined(__ARM_FEATURE_SVE_BF16) */\n";
       }
 
   SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D82501: [sve][acle] Add reinterpret intrinsics for brain float.

Reply via email to