[PATCH] D75254: [ARM,MVE] Add ACLE intrinsics for VCVT.F32.F16 family.

Simon Tatham via Phabricator via cfe-commits Mon, 02 Mar 2020 02:36:11 -0800

This revision was automatically updated to reflect the committed changes.
Closed by commit rGb08d2ddd69b4: [ARM,MVE] Add ACLE intrinsics for VCVT.F32.F16 
family. (authored by simon_tatham).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75254/new/

https://reviews.llvm.org/D75254

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt.ll
===================================================================
--- llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt.ll
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt.ll
@@ -6,6 +6,8 @@
 
 declare <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half>, <4 x float>, i32)
 declare <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half>, <4 x float>, i32, <4 x i1>)
+declare <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half>, i32)
+declare <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float>, <8 x half>, i32, <4 x i1>)
 
 declare <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32, <8 x i16>, i32)
 declare <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32, <4 x i32>, i32)
@@ -367,3 +369,51 @@
   %2 = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> undef, <4 x float> %a, i32 32, <4 x i1> %1)
   ret <4 x i32> %2
 }
+
+define arm_aapcs_vfpcc <4 x float> @test_vcvtbq_f32_f16(<8 x half> %a) {
+; CHECK-LABEL: test_vcvtbq_f32_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vcvtb.f32.f16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> %a, i32 0)
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vcvttq_f32_f16(<8 x half> %a) {
+; CHECK-LABEL: test_vcvttq_f32_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vcvtt.f32.f16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> %a, i32 1)
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vcvtbq_m_f32_f16(<4 x float> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtbq_m_f32_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtbt.f32.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> %inactive, <8 x half> %a, i32 0, <4 x i1> %1)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vcvttq_m_f32_f16(<4 x float> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvttq_m_f32_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvttt.f32.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> %inactive, <8 x half> %a, i32 1, <4 x i1> %1)
+  ret <4 x float> %2
+}
Index: llvm/lib/Target/ARM/ARMInstrMVE.td
===================================================================
--- llvm/lib/Target/ARM/ARMInstrMVE.td
+++ llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -4515,6 +4515,17 @@
 
 multiclass MVE_VCVT_h2f_m<string iname, int half> {
   def "": MVE_VCVT_ff<iname, "f32.f16", 0b1, half, (ins), vpred_r, "">;
+  defvar Inst = !cast<Instruction>(NAME);
+
+  let Predicates = [HasMVEFloat] in {
+    def : Pat<(v4f32 (int_arm_mve_vcvt_widen (v8f16 MQPR:$Qm), (i32 half))),
+              (v4f32 (Inst (v8f16 MQPR:$Qm)))>;
+    def : Pat<(v4f32 (int_arm_mve_vcvt_widen_predicated
+                         (v4f32 MQPR:$inactive), (v8f16 MQPR:$Qm), (i32 half),
+                         (v4i1 VCCR:$mask))),
+              (v4f32 (Inst (v8f16 MQPR:$Qm), ARMVCCThen,
+                           (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive)))>;
+  }
 }
 
 defm MVE_VCVTf16f32bh : MVE_VCVT_f2h_m<"vcvtb", 0b0>;
Index: llvm/include/llvm/IR/IntrinsicsARM.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsARM.td
+++ llvm/include/llvm/IR/IntrinsicsARM.td
@@ -911,8 +911,22 @@
           LLVMMatchType<0>, rets[0])], props>;
 }
 
+// Intrinsic with a predicated and a non-predicated case. The predicated case
+// has two additional parameters: inactive (the value for inactive lanes, can
+// be undef) and predicate.
+multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags,
+                           list<LLVMType> params, LLVMType inactive,
+                           LLVMType predicate,
+                           list<IntrinsicProperty> props = [IntrNoMem]> {
+  def "":          Intrinsic<rets, flags # params, props>;
+  def _predicated: Intrinsic<rets, flags # [inactive] # params # [predicate],
+                             props>;
+}
+
 defm int_arm_mve_vcvt_narrow: MVEPredicated<[llvm_v8f16_ty],
    [llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty>;
+defm int_arm_mve_vcvt_widen: MVEMXPredicated<[llvm_v4f32_ty], [],
+   [llvm_v8f16_ty, llvm_i32_ty], llvm_v4f32_ty, llvm_v4i1_ty>;
 
 defm int_arm_mve_vldr_gather_base: MVEPredicated<
    [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty],
@@ -1044,18 +1058,6 @@
    [llvm_anyvector_ty],
    [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrNoMem]>;
 
-// Intrinsic with a predicated and a non-predicated case. The predicated case
-// has two additional parameters: inactive (the value for inactive lanes, can
-// be undef) and predicate.
-multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags,
-                           list<LLVMType> params, LLVMType inactive,
-                           LLVMType predicate,
-                           list<IntrinsicProperty> props = [IntrNoMem]> {
-  def "":          Intrinsic<rets, flags # params, props>;
-  def _predicated: Intrinsic<rets, flags # [inactive] # params # [predicate],
-                             props>;
-}
-
 // The first two parameters are compile-time constants:
 // * Halving: 0 means  halving (vhcaddq), 1 means non-halving (vcaddq) 
 //            instruction. Note: the flag is inverted to match the corresonding
Index: clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
===================================================================
--- clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
+++ clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
@@ -697,3 +697,71 @@
 {
     return vcvtq_x_n_u32_f32(a, 32, p);
 }
+
+// CHECK-LABEL: @test_vcvtbq_f32_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> [[A:%.*]], i32 0)
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vcvtbq_f32_f16(float16x8_t a)
+{
+    return vcvtbq_f32_f16(a);
+}
+
+// CHECK-LABEL: @test_vcvttq_f32_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> [[A:%.*]], i32 1)
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vcvttq_f32_f16(float16x8_t a)
+{
+    return vcvttq_f32_f16(a);
+}
+
+// CHECK-LABEL: @test_vcvtbq_m_f32_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtbq_m_f32_f16(float32x4_t inactive, float16x8_t a, mve_pred16_t p)
+{
+    return vcvtbq_m_f32_f16(inactive, a, p);
+}
+
+// CHECK-LABEL: @test_vcvttq_m_f32_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvttq_m_f32_f16(float32x4_t inactive, float16x8_t a, mve_pred16_t p)
+{
+    return vcvttq_m_f32_f16(inactive, a, p);
+}
+
+// CHECK-LABEL: @test_vcvtbq_x_f32_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> undef, <8 x half> [[A:%.*]], i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtbq_x_f32_f16(float16x8_t a, mve_pred16_t p)
+{
+    return vcvtbq_x_f32_f16(a, p);
+}
+
+// CHECK-LABEL: @test_vcvttq_x_f32_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> undef, <8 x half> [[A:%.*]], i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvttq_x_f32_f16(float16x8_t a, mve_pred16_t p)
+{
+    return vcvttq_x_f32_f16(a, p);
+}
Index: clang/include/clang/Basic/arm_mve.td
===================================================================
--- clang/include/clang/Basic/arm_mve.td
+++ clang/include/clang/Basic/arm_mve.td
@@ -453,6 +453,15 @@
       VecOf<f16>, (args VecOf<f16>:$inactive, Vector:$a, PredOf<f32>:$pred),
       (IRInt<"vcvt_narrow_predicated"> $inactive, $a, halfconst, $pred)>;
   } // params = [f32], pnt = PNT_None
+
+  let params = [f16], pnt = PNT_None in {
+    def vcvt#half#q_f32: Intrinsic<VecOf<f32>, (args Vector:$a),
+      (IRInt<"vcvt_widen"> $a, halfconst)>;
+    defm vcvt#half#q: IntrinsicMX<
+      VecOf<f32>, (args Vector:$a, PredOf<f32>:$pred),
+      (IRInt<"vcvt_widen_predicated"> $inactive, $a, halfconst, $pred),
+      1, "_f32">;
+  } // params = [f16], pnt = PNT_None
 } // loop over half = "b", "t"
 
 multiclass float_int_conversions<Type FScalar, Type IScalar, IRBuilderBase ftoi, IRBuilderBase itof> {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75254: [ARM,MVE] Add ACLE intrinsics for VCVT.F32.F16 family.

Reply via email to