[PATCH] D82908: [SVE] ACLE: Fix builtins for svdup_lane_bf16 and svcvtnt_bf16_f32_x

Sander de Smalen via Phabricator via cfe-commits Tue, 30 Jun 2020 13:37:12 -0700

sdesmalen created this revision.
sdesmalen added a reviewer: fpetrogalli.
Herald added subscribers: cfe-commits, psnobl, rkruppe, tschuett.
Herald added a reviewer: efriedma.
Herald added a project: clang.


bfloat16 variants of svdup_lane were missing, and svcvtnt_bf16_x
was implemented incorrectly (it takes an operand for the inactive
lanes)


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D82908

Files:
  clang/include/clang/Basic/arm_sve.td
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c
  clang/utils/TableGen/SveEmitter.cpp


Index: clang/utils/TableGen/SveEmitter.cpp
===================================================================
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -1262,6 +1262,11 @@
   if (!InGuard.empty())
     OS << "#endif  //" << InGuard << "\n";
 
+  OS << "#if defined(__ARM_FEATURE_SVE_BF16)\n";
+  OS << "#define svcvtnt_bf16_x      svcvtnt_bf16_m\n";
+  OS << "#define svcvtnt_bf16_f32_x  svcvtnt_bf16_f32_m\n";
+  OS << "#endif /*__ARM_FEATURE_SVE_BF16 */\n\n";
+
   OS << "#if defined(__ARM_FEATURE_SVE2)\n";
   OS << "#define svcvtnt_f16_x      svcvtnt_f16_m\n";
   OS << "#define svcvtnt_f16_f32_x  svcvtnt_f16_f32_m\n";
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c
@@ -51,3 +51,13 @@
   // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_x'}}
   return SVE_ACLE_FUNC(svdup, _n, _bf16_x, )(pg, op);
 }
+
+svbfloat16_t test_svdup_lane_bf16(svbfloat16_t data, uint16_t index)
+{
+  // CHECK-LABEL: test_svdup_lane_bf16
+  // CHECK: %[[DUP:.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.x.nxv8i16(i16 %index)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i16> 
%[[DUP]])
+  // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
+  // expected-warning@+1 {{implicit declaration of function 'svdup_lane_bf16'}}
+  return SVE_ACLE_FUNC(svdup_lane,_bf16,,)(data, index);
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c
@@ -10,18 +10,18 @@
 #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
 #endif
 
-svbfloat16_t test_svcvtnt_bf16_f32_x(svbool_t pg, svfloat32_t op) {
+svbfloat16_t test_svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, 
svfloat32_t op) {
   // CHECK-LABEL: test_svcvtnt_bf16_f32_x
   // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
-  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> 
%[[PG]], <vscale x 4 x float> %op)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> %even, <vscale x 8 x i1> 
%[[PG]], <vscale x 4 x float> %op)
   // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
-  return SVE_ACLE_FUNC(svcvtnt_bf16, _f32, _x, )(pg, op);
+  return SVE_ACLE_FUNC(svcvtnt_bf16, _f32, _x, )(even, pg, op);
 }
 
-svbfloat16_t test_svcvtnt_bf16_f32_m(svbfloat16_t inactive, svbool_t pg, 
svfloat32_t op) {
+svbfloat16_t test_svcvtnt_bf16_f32_m(svbfloat16_t even, svbool_t pg, 
svfloat32_t op) {
   // CHECK-LABEL: test_svcvtnt_bf16_f32_m
   // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
-  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> %inactive, <vscale x 8 x 
i1> %[[PG]], <vscale x 4 x float> %op)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> %even, <vscale x 8 x i1> 
%[[PG]], <vscale x 4 x float> %op)
   // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
-  return SVE_ACLE_FUNC(svcvtnt_bf16, _f32, _m, )(inactive, pg, op);
+  return SVE_ACLE_FUNC(svcvtnt_bf16, _f32, _m, )(even, pg, op);
 }
Index: clang/include/clang/Basic/arm_sve.td
===================================================================
--- clang/include/clang/Basic/arm_sve.td
+++ clang/include/clang/Basic/arm_sve.td
@@ -1110,7 +1110,7 @@
 
 let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
   defm SVCVT_BF16_F32   : SInstCvtMXZ<"svcvt_bf16[_f32]",  "ddPM", "dPM", "b", 
 "aarch64_sve_fcvt_bf16f32">;
-  defm SVCVTNT_BF16_F32 : SInstCvtMX<"svcvtnt_bf16[_f32]", "ddPM", "dPM", "b", 
 "aarch64_sve_fcvtnt_bf16f32">;
+  def SVCVTNT_BF16_F32 : SInst<"svcvtnt_bf16[_f32]", "ddPM", "b",  MergeOp1, 
"aarch64_sve_fcvtnt_bf16f32", [IsOverloadNone]>;
 }
 
 // svcvt_s##_f64
@@ -1204,6 +1204,11 @@
 // instruction such as DUP (indexed) if the lane index fits the range of the
 // instruction's immediate.
 def SVDUP_LANE   : SInst<"svdup_lane[_{d}]",  "ddL",  "csilUcUsUiUlhfd", 
MergeNone, "aarch64_sve_tbl">;
+let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
+def SVDUP_LANE_BF16 :
+                   SInst<"svdup_lane[_{d}]",  "ddL",  "b",               
MergeNone, "aarch64_sve_tbl">;
+}
+
 def SVDUPQ_LANE  : SInst<"svdupq_lane[_{d}]", "ddn",  "csilUcUsUiUlhfd", 
MergeNone, "aarch64_sve_dupq_lane">;
 let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
   def SVDUPQ_LANE_BF16  : SInst<"svdupq_lane[_{d}]", "ddn",  "b", MergeNone, 
"aarch64_sve_dupq_lane">;

Index: clang/utils/TableGen/SveEmitter.cpp
===================================================================
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -1262,6 +1262,11 @@
   if (!InGuard.empty())
     OS << "#endif  //" << InGuard << "\n";
 
+  OS << "#if defined(__ARM_FEATURE_SVE_BF16)\n";
+  OS << "#define svcvtnt_bf16_x      svcvtnt_bf16_m\n";
+  OS << "#define svcvtnt_bf16_f32_x  svcvtnt_bf16_f32_m\n";
+  OS << "#endif /*__ARM_FEATURE_SVE_BF16 */\n\n";
+
   OS << "#if defined(__ARM_FEATURE_SVE2)\n";
   OS << "#define svcvtnt_f16_x      svcvtnt_f16_m\n";
   OS << "#define svcvtnt_f16_f32_x  svcvtnt_f16_f32_m\n";
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c
@@ -51,3 +51,13 @@
   // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_x'}}
   return SVE_ACLE_FUNC(svdup, _n, _bf16_x, )(pg, op);
 }
+
+svbfloat16_t test_svdup_lane_bf16(svbfloat16_t data, uint16_t index)
+{
+  // CHECK-LABEL: test_svdup_lane_bf16
+  // CHECK: %[[DUP:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %index)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i16> %[[DUP]])
+  // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
+  // expected-warning@+1 {{implicit declaration of function 'svdup_lane_bf16'}}
+  return SVE_ACLE_FUNC(svdup_lane,_bf16,,)(data, index);
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c
@@ -10,18 +10,18 @@
 #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
 #endif
 
-svbfloat16_t test_svcvtnt_bf16_f32_x(svbool_t pg, svfloat32_t op) {
+svbfloat16_t test_svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, svfloat32_t op) {
   // CHECK-LABEL: test_svcvtnt_bf16_f32_x
   // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
-  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> %[[PG]], <vscale x 4 x float> %op)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> %even, <vscale x 8 x i1> %[[PG]], <vscale x 4 x float> %op)
   // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
-  return SVE_ACLE_FUNC(svcvtnt_bf16, _f32, _x, )(pg, op);
+  return SVE_ACLE_FUNC(svcvtnt_bf16, _f32, _x, )(even, pg, op);
 }
 
-svbfloat16_t test_svcvtnt_bf16_f32_m(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) {
+svbfloat16_t test_svcvtnt_bf16_f32_m(svbfloat16_t even, svbool_t pg, svfloat32_t op) {
   // CHECK-LABEL: test_svcvtnt_bf16_f32_m
   // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
-  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> %inactive, <vscale x 8 x i1> %[[PG]], <vscale x 4 x float> %op)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> %even, <vscale x 8 x i1> %[[PG]], <vscale x 4 x float> %op)
   // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
-  return SVE_ACLE_FUNC(svcvtnt_bf16, _f32, _m, )(inactive, pg, op);
+  return SVE_ACLE_FUNC(svcvtnt_bf16, _f32, _m, )(even, pg, op);
 }
Index: clang/include/clang/Basic/arm_sve.td
===================================================================
--- clang/include/clang/Basic/arm_sve.td
+++ clang/include/clang/Basic/arm_sve.td
@@ -1110,7 +1110,7 @@
 
 let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
   defm SVCVT_BF16_F32   : SInstCvtMXZ<"svcvt_bf16[_f32]",  "ddPM", "dPM", "b",  "aarch64_sve_fcvt_bf16f32">;
-  defm SVCVTNT_BF16_F32 : SInstCvtMX<"svcvtnt_bf16[_f32]", "ddPM", "dPM", "b",  "aarch64_sve_fcvtnt_bf16f32">;
+  def SVCVTNT_BF16_F32 : SInst<"svcvtnt_bf16[_f32]", "ddPM", "b",  MergeOp1, "aarch64_sve_fcvtnt_bf16f32", [IsOverloadNone]>;
 }
 
 // svcvt_s##_f64
@@ -1204,6 +1204,11 @@
 // instruction such as DUP (indexed) if the lane index fits the range of the
 // instruction's immediate.
 def SVDUP_LANE   : SInst<"svdup_lane[_{d}]",  "ddL",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">;
+let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
+def SVDUP_LANE_BF16 :
+                   SInst<"svdup_lane[_{d}]",  "ddL",  "b",               MergeNone, "aarch64_sve_tbl">;
+}
+
 def SVDUPQ_LANE  : SInst<"svdupq_lane[_{d}]", "ddn",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane">;
 let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
   def SVDUPQ_LANE_BF16  : SInst<"svdupq_lane[_{d}]", "ddn",  "b", MergeNone, "aarch64_sve_dupq_lane">;

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D82908: [SVE] ACLE: Fix builtins for svdup_lane_bf16 and svcvtnt_bf16_f32_x

Reply via email to