[PATCH] D49941: [ARM] Add ARMv8.2-A FP16 scalar intrinsic

Abderrazek Zaafrani via Phabricator via cfe-commits Fri, 27 Jul 2018 15:03:50 -0700

az created this revision.
az added a reviewer: SjoerdMeijer.
Herald added a reviewer: javed.absar.
Herald added subscribers: cfe-commits, chrib, kristof.beyls.


This patch adds the fp16 scalar intrinsic for ARM as described in the ARM ACLE 
document. Only the frontend work is done here and some work is still needed in 
the backend codegen.


Repository:
  rC Clang

https://reviews.llvm.org/D49941

Files:
  clang/include/clang/Basic/arm_fp16.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/arm-v8.2a-fp16-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsARM.td

Index: llvm/include/llvm/IR/IntrinsicsARM.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsARM.td
+++ llvm/include/llvm/IR/IntrinsicsARM.td
@@ -355,6 +355,9 @@
 class Neon_2Arg_Intrinsic
   : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
               [IntrNoMem]>;
+class Float_2Arg_Intrinsic
+  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+              [IntrNoMem]>;
 class Neon_2Arg_Narrow_Intrinsic
   : Intrinsic<[llvm_anyvector_ty], [LLVMExtendedType<0>, LLVMExtendedType<0>],
               [IntrNoMem]>;
@@ -377,8 +380,8 @@
   : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
 class Neon_CvtFPToFx_Intrinsic
   : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
-class Neon_CvtFPtoInt_1Arg_Intrinsic
-  : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+class CvtFPtoInt_1Arg_Intrinsic
+  : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
 
 class Neon_Compare_Intrinsic
   : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
@@ -431,12 +434,12 @@
   // Vector Maximum.
   def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic;
   def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic;
-  def int_arm_neon_vmaxnm : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vmaxnm : Float_2Arg_Intrinsic;
 
   // Vector Minimum.
   def int_arm_neon_vmins : Neon_2Arg_Intrinsic;
   def int_arm_neon_vminu : Neon_2Arg_Intrinsic;
-  def int_arm_neon_vminnm : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vminnm : Float_2Arg_Intrinsic;
 
   // Vector Reciprocal Step.
   def int_arm_neon_vrecps : Neon_2Arg_Intrinsic;
@@ -552,15 +555,15 @@
 // Vector Reciprocal Square Root Estimate.
 def int_arm_neon_vrsqrte : Neon_1Arg_Intrinsic;
 
-// Vector Conversions Between Floating-point and Integer
-def int_arm_neon_vcvtau : Neon_CvtFPtoInt_1Arg_Intrinsic;
-def int_arm_neon_vcvtas : Neon_CvtFPtoInt_1Arg_Intrinsic;
-def int_arm_neon_vcvtnu : Neon_CvtFPtoInt_1Arg_Intrinsic;
-def int_arm_neon_vcvtns : Neon_CvtFPtoInt_1Arg_Intrinsic;
-def int_arm_neon_vcvtpu : Neon_CvtFPtoInt_1Arg_Intrinsic;
-def int_arm_neon_vcvtps : Neon_CvtFPtoInt_1Arg_Intrinsic;
-def int_arm_neon_vcvtmu : Neon_CvtFPtoInt_1Arg_Intrinsic;
-def int_arm_neon_vcvtms : Neon_CvtFPtoInt_1Arg_Intrinsic;
+// Conversions Between Floating-point and Integer
+def int_arm_neon_vcvtau : CvtFPtoInt_1Arg_Intrinsic;
+def int_arm_neon_vcvtas : CvtFPtoInt_1Arg_Intrinsic;
+def int_arm_neon_vcvtnu : CvtFPtoInt_1Arg_Intrinsic;
+def int_arm_neon_vcvtns : CvtFPtoInt_1Arg_Intrinsic;
+def int_arm_neon_vcvtpu : CvtFPtoInt_1Arg_Intrinsic;
+def int_arm_neon_vcvtps : CvtFPtoInt_1Arg_Intrinsic;
+def int_arm_neon_vcvtmu : CvtFPtoInt_1Arg_Intrinsic;
+def int_arm_neon_vcvtms : CvtFPtoInt_1Arg_Intrinsic;
 
 // Vector Conversions Between Floating-point and Fixed-point.
 def int_arm_neon_vcvtfp2fxs : Neon_CvtFPToFx_Intrinsic;
Index: clang/test/CodeGen/arm-v8.2a-fp16-intrinsics.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/arm-v8.2a-fp16-intrinsics.c
@@ -0,0 +1,225 @@
+// RUN: %clang_cc1 -triple armv8.2a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature +fullfp16 \
+// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \
+// RUN: | opt -S -mem2reg \
+// RUN: | FileCheck %s
+
+// REQUIRES: arm-registered-target
+
+#include <arm_fp16.h>
+
+// CHECK-LABEL: test_vabsh_f16
+// CHECK:  [[ABS:%.*]] =  call half @llvm.fabs.f16(half %{{.*}})
+float16_t test_vabsh_f16(float16_t a) {
+  return vabsh_f16(a);
+}
+
+// CHECK-LABEL: test_vcvth_f16_s32
+// CHECK:  [[VCVT:%.*]] = sitofp i32 %a to half
+float16_t test_vcvth_f16_s32 (int32_t a) {
+  return vcvth_f16_s32(a);
+}
+
+// CHECK-LABEL: test_vcvth_f16_u32
+// CHECK:  [[VCVT:%.*]] = uitofp i32 %a to half
+float16_t test_vcvth_f16_u32 (uint32_t a) {
+  return vcvth_f16_u32(a);
+}
+
+// CHECK-LABEL: test_vcvth_s32_f16
+// CHECK:  [[VCVT:%.*]] = fptosi half %{{.*}} to i32
+// CHECK: ret i32 [[VCVT]]
+int32_t test_vcvth_s32_f16 (float16_t a) {
+  return vcvth_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvth_u32_f16
+// CHECK:  [[VCVT:%.*]] = fptoui half %{{.*}} to i32
+// CHECK: ret i32 [[VCVT]]
+uint32_t test_vcvth_u32_f16 (float16_t a) {
+  return vcvth_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtah_s32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtas.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+int32_t test_vcvtah_s32_f16 (float16_t a) {
+  return vcvtah_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtah_u32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtau.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+uint32_t test_vcvtah_u32_f16 (float16_t a) {
+  return vcvtah_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtmh_s32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtms.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+int32_t test_vcvtmh_s32_f16 (float16_t a) {
+  return vcvtmh_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtmh_u32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtmu.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+uint32_t test_vcvtmh_u32_f16 (float16_t a) {
+  return vcvtmh_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtnh_s32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtns.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+int32_t test_vcvtnh_s32_f16 (float16_t a) {
+  return vcvtnh_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtnh_u32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtnu.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+uint32_t test_vcvtnh_u32_f16 (float16_t a) {
+  return vcvtnh_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtph_s32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtps.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+int32_t test_vcvtph_s32_f16 (float16_t a) {
+  return vcvtph_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtph_u32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtpu.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+uint32_t test_vcvtph_u32_f16 (float16_t a) {
+  return vcvtph_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vnegh_f16
+// CHECK: [[NEG:%.*]] = fsub half 0xH8000, %a
+float16_t test_vnegh_f16(float16_t a) {
+  return vnegh_f16(a);
+}
+
+// CHECK-LABEL: test_vrndh_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.trunc.f16(half %{{.*}})
+float16_t test_vrndh_f16(float16_t a) {
+  return vrndh_f16(a);
+}
+
+// CHECK-LABEL: test_vrndah_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.round.f16(half %{{.*}})
+float16_t test_vrndah_f16(float16_t a) {
+  return vrndah_f16(a);
+}
+
+// CHECK-LABEL: test_vrndih_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.nearbyint.f16(half %{{.*}})
+float16_t test_vrndih_f16(float16_t a) {
+  return vrndih_f16(a);
+}
+
+// CHECK-LABEL: test_vrndmh_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.floor.f16(half %{{.*}})
+float16_t test_vrndmh_f16(float16_t a) {
+  return vrndmh_f16(a);
+}
+
+// CHECK-LABEL: test_vrndnh_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.arm.neon.vrintn.f16(half %{{.*}})
+float16_t test_vrndnh_f16(float16_t a) {
+  return vrndnh_f16(a);
+}
+
+// CHECK-LABEL: test_vrndph_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.ceil.f16(half %{{.*}})
+float16_t test_vrndph_f16(float16_t a) {
+  return vrndph_f16(a);
+}
+
+// CHECK-LABEL: test_vrndxh_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.rint.f16(half %{{.*}})
+float16_t test_vrndxh_f16(float16_t a) {
+  return vrndxh_f16(a);
+}
+
+// CHECK-LABEL: test_vsqrth_f16
+// CHECK:  [[SQR:%.*]] = call half @llvm.sqrt.f16(half %{{.*}})
+float16_t test_vsqrth_f16(float16_t a) {
+  return vsqrth_f16(a);
+}
+
+// CHECK-LABEL: test_vaddh_f16
+// CHECK:  [[ADD:%.*]] = fadd half [[A:%.*]], [[B:%.*]]
+float16_t test_vaddh_f16(float16_t a, float16_t b) {
+  return vaddh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcvth_n_f16_s32
+// CHECK:  [[CVT:%.*]] = call half @llvm.arm.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1)
+float16_t test_vcvth_n_f16_s32(int32_t a) {
+  return vcvth_n_f16_s32(a, 1);
+}
+
+// CHECK-LABEL: test_vcvth_n_s32_f16
+// CHECK:  [[CVT:%.*]] = call i32 @llvm.arm.neon.vcvtfp2fxs.i32.f16(half {{%.*}}, i32 1)
+// CHECK:  ret i32 [[CVT]]
+int32_t test_vcvth_n_s32_f16(float16_t a) {
+  return vcvth_n_s32_f16(a, 1);
+}
+
+// CHECK-LABEL: test_vcvth_n_f16_u32
+// CHECK:  [[CVT:%.*]] = call half @llvm.arm.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 1)
+float16_t test_vcvth_n_f16_u32(int32_t a) {
+  return vcvth_n_f16_u32(a, 1);
+}
+
+// CHECK-LABEL: test_vcvth_n_u32_f16
+// CHECK:  [[CVT:%.*]] = call i32 @llvm.arm.neon.vcvtfp2fxu.i32.f16(half {{%.*}}, i32 1)
+// CHECK:  ret i32 [[CVT]]
+int32_t test_vcvth_n_u32_f16(float16_t a) {
+  return vcvth_n_u32_f16(a, 1);
+}
+
+// CHECK-LABEL: test_vdivh_f16
+// CHECK:  [[DIV:%.*]] = fdiv half [[A:%.*]], [[B:%.*]]
+float16_t test_vdivh_f16(float16_t a, float16_t b) {
+  return vdivh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmaxnmh_f16
+// CHECK:  [[MAX:%.*]] = call half @llvm.arm.neon.vmaxnm.f16(half [[A:%.*]], half [[B:%.*]])
+float16_t test_vmaxnmh_f16(float16_t a, float16_t b) {
+  return vmaxnmh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vminnmh_f16
+// CHECK:  [[MIN:%.*]] = call half @llvm.arm.neon.vminnm.f16(half [[A:%.*]], half [[B:%.*]])
+float16_t test_vminnmh_f16(float16_t a, float16_t b) {
+  return vminnmh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmulh_f16
+// CHECK:  [[MUL:%.*]] = fmul half [[A:%.*]], [[B:%.*]]
+float16_t test_vmulh_f16(float16_t a, float16_t b) {
+  return vmulh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vsubh_f16
+// CHECK:  [[SUB:%.*]] = fsub half [[A:%.*]], [[B:%.*]]
+float16_t test_vsubh_f16(float16_t a, float16_t b) {
+  return vsubh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vfmah_f16
+// CHECK:  [[FMA:%.*]] = call half @llvm.fma.f16(half [[A:%.*]], half [[B:%.*]], half [[C:%.*]])
+float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) {
+  return vfmah_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmsh_f16
+// CHECK:  [[SUB:%.*]] = fsub half 0xH8000, %b
+// CHECK:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[B:%.*]], half [[C:%.*]])
+float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
+  return vfmsh_f16(a, b, c);
+}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -4221,6 +4221,21 @@
   NEONMAP0(vzipq_v)
 };
 
+static const NeonIntrinsicInfo ARMSISDIntrinsicMap [] = {
+  NEONMAP1(vcvtah_s32_f16, arm_neon_vcvtas, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtah_u32_f16, arm_neon_vcvtau, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_f16_s32, arm_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_f16_u32, arm_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_s32_f16, arm_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_u32_f16, arm_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtmh_s32_f16, arm_neon_vcvtms, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtmh_u32_f16, arm_neon_vcvtmu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtnh_s32_f16, arm_neon_vcvtns, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtnh_u32_f16, arm_neon_vcvtnu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtph_s32_f16, arm_neon_vcvtps, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtph_u32_f16, arm_neon_vcvtpu, AddRetType | Add1ArgType),
+};
+
 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
@@ -5906,6 +5921,121 @@
     }
   }
 
+  // Emit the ARM SISD builtins wih identical semantics.
+  auto SISDMap = makeArrayRef(ARMSISDIntrinsicMap);
+  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(SISDMap, BuiltinID,
+      AArch64SISDIntrinsicsProvenSorted);
+  if (Builtin) {
+    Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
+    Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
+    assert(Result && "SISD intrinsic should have been handled");
+    return Result;
+  }
+
+  // fp16 scalar intrinscs
+  bool usgn = false;
+  switch (BuiltinID) {
+  default: break;
+  case NEON::BI__builtin_neon_vabsh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
+  case NEON::BI__builtin_neon_vcvth_u32_f16:
+    usgn = true;
+    // FALL THROUGH
+  case NEON::BI__builtin_neon_vcvth_s32_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
+    if (usgn)
+      return Builder.CreateFPToUI(Ops[0], Int32Ty);
+    return Builder.CreateFPToSI(Ops[0], Int32Ty);
+  }
+  case NEON::BI__builtin_neon_vcvth_f16_u32:
+    usgn = true;
+    // FALL THROUGH
+  case NEON::BI__builtin_neon_vcvth_f16_s32: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = Builder.CreateBitCast(Ops[0], Int32Ty);
+    if (usgn)
+      return Builder.CreateUIToFP(Ops[0], HalfTy);
+    return Builder.CreateSIToFP(Ops[0], HalfTy);
+  }
+  case NEON::BI__builtin_neon_vnegh_f16:
+    return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
+  case NEON::BI__builtin_neon_vrndh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::trunc;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
+  }
+  case NEON::BI__builtin_neon_vrndah_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::round;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
+  }
+  case NEON::BI__builtin_neon_vrndih_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::nearbyint;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
+  }
+  case NEON::BI__builtin_neon_vrndmh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::floor;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
+  }
+  case NEON::BI__builtin_neon_vrndnh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::arm_neon_vrintn;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
+  }
+  case NEON::BI__builtin_neon_vrndph_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::ceil;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
+  }
+  case NEON::BI__builtin_neon_vrndxh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::rint;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
+  }
+  case NEON::BI__builtin_neon_vsqrth_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::sqrt;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
+  }
+  case NEON::BI__builtin_neon_vaddh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
+  case NEON::BI__builtin_neon_vsubh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
+  case NEON::BI__builtin_neon_vmulh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
+  case NEON::BI__builtin_neon_vdivh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
+  case NEON::BI__builtin_neon_vminnmh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    unsigned Int = Intrinsic::arm_neon_vminnm;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
+  }
+  case NEON::BI__builtin_neon_vmaxnmh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    unsigned Int = Intrinsic::arm_neon_vmaxnm;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
+  }
+  case NEON::BI__builtin_neon_vfmah_f16: {
+    Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
+    return Builder.CreateCall(F,
+      {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
+  }
+  case NEON::BI__builtin_neon_vfmsh_f16: {
+    Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
+    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
+    Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
+    return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
+  }
+  }
+
   switch (BuiltinID) {
   default: break;
 
@@ -6013,7 +6143,7 @@
 
   // Determine the type of this overloaded NEON intrinsic.
   NeonTypeFlags Type(Result.getZExtValue());
-  bool usgn = Type.isUnsigned();
+  usgn = Type.isUnsigned();
   bool rightShift = false;
 
   llvm::VectorType *VTy = GetNeonType(this, Type,
@@ -6025,7 +6155,7 @@
   // Many NEON builtins have identical semantics and uses in ARM and
   // AArch64. Emit these in a single function.
   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
-  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
+  Builtin = findNeonIntrinsicInMap(
       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
   if (Builtin)
     return EmitCommonNeonBuiltinExpr(
Index: clang/include/clang/Basic/arm_fp16.td
===================================================================
--- clang/include/clang/Basic/arm_fp16.td
+++ clang/include/clang/Basic/arm_fp16.td
@@ -14,16 +14,63 @@
 
 include "arm_neon_incl.td"
 
-// ARMv8.2-A FP16 intrinsics.
-let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in {
-
+// ARMv8.2-A FP16 intrinsics for A32/A64.
+let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)" in {
   // Negate
   def VNEGSH          : SInst<"vneg", "ss", "Sh">;
 
-  // Reciprocal/Sqrt
-  def SCALAR_FRECPSH  : IInst<"vrecps", "sss", "Sh">;
+  // Sqrt
   def FSQRTSH         : SInst<"vsqrt", "ss", "Sh">;
-  def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">;
+
+  // Rounding
+  def FRINTZ_S64H     : SInst<"vrnd", "ss", "Sh">;
+  def FRINTA_S64H     : SInst<"vrnda", "ss", "Sh">;
+  def FRINTI_S64H     : SInst<"vrndi", "ss", "Sh">;
+  def FRINTM_S64H     : SInst<"vrndm", "ss", "Sh">;
+  def FRINTN_S64H     : SInst<"vrndn", "ss", "Sh">;
+  def FRINTP_S64H     : SInst<"vrndp", "ss", "Sh">;
+  def FRINTX_S64H     : SInst<"vrndx", "ss", "Sh">;
+
+  // Conversion
+  def SCALAR_SCVTFSH  : SInst<"vcvth_f16", "Ys", "iUi">;
+  def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">;
+  def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">;
+  def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">;
+  def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">;
+  def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">;
+  def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">;
+  def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">;
+  def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">;
+  def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">;
+  def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">;
+  let isVCVT_N = 1 in {
+    def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "iUi">;
+    def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">;
+    def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">;
+  }
+
+  // Scalar Absolute Value
+  def SCALAR_ABSH     : SInst<"vabs", "ss", "Sh">;
+
+  // Add/Sub
+  def VADDSH          : SInst<"vadd", "sss", "Sh">;
+  def VSUBHS          : SInst<"vsub", "sss", "Sh">;
+
+  // Max/Min(nm)
+  def FMAXNMHS        : SInst<"vmaxnm", "sss", "Sh">;
+  def FMINNMHS        : SInst<"vminnm", "sss", "Sh">;
+
+  // Multiplication/Division
+  def VMULHS          : SInst<"vmul", "sss", "Sh">;
+  def FDIVHS          : SInst<"vdiv", "sss",  "Sh">;
+
+  // Vector fused multiply-add operations
+  def VFMAHS          : SInst<"vfma", "ssss", "Sh">;
+  def VFMSHS          : SInst<"vfms", "ssss", "Sh">;
+}
+
+// ARMv8.2-A FP16 intrinsics for A64 only.
+let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in {
 
   // Reciprocal Estimate
   def SCALAR_FRECPEH  : IInst<"vrecpe", "ss", "Sh">;
@@ -34,67 +81,51 @@
   // Reciprocal Square Root Estimate
   def SCALAR_FRSQRTEH : IInst<"vrsqrte", "ss", "Sh">;
 
-  // Rounding
-  def FRINTZ_S64H     : SInst<"vrnd", "ss", "Sh">;
-  def FRINTA_S64H     : SInst<"vrnda", "ss", "Sh">;
-  def FRINTI_S64H     : SInst<"vrndi", "ss", "Sh">;
-  def FRINTM_S64H     : SInst<"vrndm", "ss", "Sh">;
-  def FRINTN_S64H     : SInst<"vrndn", "ss", "Sh">;
-  def FRINTP_S64H     : SInst<"vrndp", "ss", "Sh">;
-  def FRINTX_S64H     : SInst<"vrndx", "ss", "Sh">;
+  // Reciprocal
+  def SCALAR_FRECPSH  : IInst<"vrecps", "sss", "Sh">;
+  def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">;
+
+  // Comparison
+  def SCALAR_CMEQRH   : SInst<"vceq", "bss", "Sh">;
+  def SCALAR_CMEQZH   : SInst<"vceqz", "bs", "Sh">;
+  def SCALAR_CMGERH   : SInst<"vcge", "bss", "Sh">;
+  def SCALAR_CMGEZH   : SInst<"vcgez", "bs", "Sh">;
+  def SCALAR_CMGTRH   : SInst<"vcgt", "bss", "Sh">;
+  def SCALAR_CMGTZH   : SInst<"vcgtz", "bs", "Sh">;
+  def SCALAR_CMLERH   : SInst<"vcle", "bss", "Sh">;
+  def SCALAR_CMLEZH   : SInst<"vclez", "bs", "Sh">;
+  def SCALAR_CMLTH    : SInst<"vclt", "bss", "Sh">;
+  def SCALAR_CMLTZH   : SInst<"vcltz", "bs", "Sh">;
 
   // Conversion
-  def SCALAR_SCVTFSH  : SInst<"vcvth_f16", "Ys", "silUsUiUl">;
+  def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "Ys", "slUsUl">;
   def SCALAR_FCVTZSH  : SInst<"vcvt_s16", "$s", "Sh">;
-  def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">;
   def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">;
   def SCALAR_FCVTZUH  : SInst<"vcvt_u16", "bs", "Sh">;
-  def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">;
   def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">;
   def SCALAR_FCVTASH  : SInst<"vcvta_s16", "$s", "Sh">;
-  def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">;
   def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">;
   def SCALAR_FCVTAUH  : SInst<"vcvta_u16", "bs", "Sh">;
-  def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">;
   def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">;
   def SCALAR_FCVTMSH  : SInst<"vcvtm_s16", "$s", "Sh">;
-  def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">;
   def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "Ls", "Sh">;
   def SCALAR_FCVTMUH  : SInst<"vcvtm_u16", "bs", "Sh">;
-  def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">;
   def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "Os", "Sh">;
   def SCALAR_FCVTNSH  : SInst<"vcvtn_s16", "$s", "Sh">;
-  def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">;
   def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "Ls", "Sh">;
   def SCALAR_FCVTNUH  : SInst<"vcvtn_u16", "bs", "Sh">;
-  def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">;
   def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "Os", "Sh">;
   def SCALAR_FCVTPSH  : SInst<"vcvtp_s16", "$s", "Sh">;
-  def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">;
   def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "Ls", "Sh">;
   def SCALAR_FCVTPUH  : SInst<"vcvtp_u16", "bs", "Sh">;
-  def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">;
   def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">;
   let isVCVT_N = 1 in {
-    def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "silUsUiUl">;
+    def SCALAR_SCVTFSHO1: SInst<"vcvth_n_f16", "Ysi", "slUsUl">;
     def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">;
-    def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">;
     def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">;
     def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">;
-    def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">;
     def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">;
   }
-  // Comparison
-  def SCALAR_CMEQRH   : SInst<"vceq", "bss", "Sh">;
-  def SCALAR_CMEQZH   : SInst<"vceqz", "bs", "Sh">;
-  def SCALAR_CMGERH   : SInst<"vcge", "bss", "Sh">;
-  def SCALAR_CMGEZH   : SInst<"vcgez", "bs", "Sh">;
-  def SCALAR_CMGTRH   : SInst<"vcgt", "bss", "Sh">;
-  def SCALAR_CMGTZH   : SInst<"vcgtz", "bs", "Sh">;
-  def SCALAR_CMLERH   : SInst<"vcle", "bss", "Sh">;
-  def SCALAR_CMLEZH   : SInst<"vclez", "bs", "Sh">;
-  def SCALAR_CMLTH    : SInst<"vclt", "bss", "Sh">;
-  def SCALAR_CMLTZH   : SInst<"vcltz", "bs", "Sh">;
 
   // Absolute Compare Mask Greater Than Or Equal
   def SCALAR_FACGEH   : IInst<"vcage", "bss", "Sh">;
@@ -104,28 +135,13 @@
   def SCALAR_FACGT    : IInst<"vcagt", "bss", "Sh">;
   def SCALAR_FACLT    : IInst<"vcalt", "bss", "Sh">;
 
-  // Scalar Absolute Value
-  def SCALAR_ABSH     : SInst<"vabs", "ss", "Sh">;
-
   // Scalar Absolute Difference
   def SCALAR_ABDH: IInst<"vabd", "sss", "Sh">;
 
-  // Add/Sub
-  def VADDSH          : SInst<"vadd", "sss", "Sh">;
-  def VSUBHS          : SInst<"vsub", "sss", "Sh">;
-
   // Max/Min
   def VMAXHS          : SInst<"vmax", "sss", "Sh">;
   def VMINHS          : SInst<"vmin", "sss", "Sh">;
-  def FMAXNMHS        : SInst<"vmaxnm", "sss", "Sh">;
-  def FMINNMHS        : SInst<"vminnm", "sss", "Sh">;
 
-  // Multiplication/Division
-  def VMULHS          : SInst<"vmul", "sss", "Sh">;
+  // Multiplication
   def MULXHS          : SInst<"vmulx", "sss", "Sh">;
-  def FDIVHS          : SInst<"vdiv", "sss",  "Sh">;
-
-  // Vector fused multiply-add operations
-  def VFMAHS          : SInst<"vfma", "ssss", "Sh">;
-  def VFMSHS          : SInst<"vfms", "ssss", "Sh">;
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D49941: [ARM] Add ARMv8.2-A FP16 scalar intrinsic

Reply via email to