[PATCH] D26476: [AMDGPU] Add support for f16 builtin functions for VI+

Konstantin Zhuravlyov via cfe-commits Wed, 09 Nov 2016 15:08:54 -0800

kzhuravl created this revision.
kzhuravl added reviewers: tstellarAMD, arsenm.
kzhuravl added a subscriber: cfe-commits.
Herald added subscribers: tony-tye, yaxunl, nhaehnle, wdng.


https://reviews.llvm.org/D26476

Files:
  include/clang/Basic/BuiltinsAMDGPU.def
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGenOpenCL/builtins-amdgcn-error-f16-class.cl
  test/CodeGenOpenCL/builtins-amdgcn-error-f16-cos.cl
  test/CodeGenOpenCL/builtins-amdgcn-error-f16-div-fixup.cl
  test/CodeGenOpenCL/builtins-amdgcn-error-f16-fract.cl
  test/CodeGenOpenCL/builtins-amdgcn-error-f16-frexp-exp.cl
  test/CodeGenOpenCL/builtins-amdgcn-error-f16-frexp-mant.cl
  test/CodeGenOpenCL/builtins-amdgcn-error-f16-ldexp.cl
  test/CodeGenOpenCL/builtins-amdgcn-error-f16-rcp.cl
  test/CodeGenOpenCL/builtins-amdgcn-error-f16-rsq.cl
  test/CodeGenOpenCL/builtins-amdgcn-error-f16-sin.cl
  test/CodeGenOpenCL/builtins-amdgcn-vi.cl
  test/CodeGenOpenCL/builtins-amdgcn.cl

Index: test/CodeGenOpenCL/builtins-amdgcn.cl
===================================================================
--- test/CodeGenOpenCL/builtins-amdgcn.cl
+++ test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -166,14 +166,14 @@
 }
 
 // CHECK-LABEL: @test_frexp_exp_f32
-// CHECK: call i32 @llvm.amdgcn.frexp.exp.f32
+// CHECK: call i32 @llvm.amdgcn.frexp.exp.i32.f32
 void test_frexp_exp_f32(global int* out, float a)
 {
   *out = __builtin_amdgcn_frexp_expf(a);
 }
 
 // CHECK-LABEL: @test_frexp_exp_f64
-// CHECK: call i32 @llvm.amdgcn.frexp.exp.f64
+// CHECK: call i32 @llvm.amdgcn.frexp.exp.i32.f64
 void test_frexp_exp_f64(global int* out, double a)
 {
   *out = __builtin_amdgcn_frexp_exp(a);
Index: test/CodeGenOpenCL/builtins-amdgcn-vi.cl
===================================================================
--- test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -1,8 +1,79 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -S -emit-llvm -o - %s | FileCheck %s
 
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 typedef unsigned long ulong;
 
+// CHECK-LABEL: @test_div_fixup_f16
+// CHECK: call half @llvm.amdgcn.div.fixup.f16
+void test_div_fixup_f16(global half* out, half a, half b, half c)
+{
+  *out = __builtin_amdgcn_div_fixuph(a, b, c);
+}
+
+// CHECK-LABEL: @test_rcp_f16
+// CHECK: call half @llvm.amdgcn.rcp.f16
+void test_rcp_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_rcph(a);
+}
+
+// CHECK-LABEL: @test_rsq_f16
+// CHECK: call half @llvm.amdgcn.rsq.f16
+void test_rsq_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_rsqh(a);
+}
+
+// CHECK-LABEL: @test_sin_f16
+// CHECK: call half @llvm.amdgcn.sin.f16
+void test_sin_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_sinh(a);
+}
+
+// CHECK-LABEL: @test_cos_f16
+// CHECK: call half @llvm.amdgcn.cos.f16
+void test_cos_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_cosh(a);
+}
+
+// CHECK-LABEL: @test_ldexp_f16
+// CHECK: call half @llvm.amdgcn.ldexp.f16
+void test_ldexp_f16(global half* out, half a, int b)
+{
+  *out = __builtin_amdgcn_ldexph(a, b);
+}
+
+// CHECK-LABEL: @test_frexp_mant_f16
+// CHECK: call half @llvm.amdgcn.frexp.mant.f16
+void test_frexp_mant_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_frexp_manth(a);
+}
+
+// CHECK-LABEL: @test_frexp_exp_f16
+// CHECK: call i16 @llvm.amdgcn.frexp.exp.i16.f16
+void test_frexp_exp_f16(global short* out, half a)
+{
+  *out = __builtin_amdgcn_frexp_exph(a);
+}
+
+// CHECK-LABEL: @test_fract_f16
+// CHECK: call half @llvm.amdgcn.fract.f16
+void test_fract_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_fracth(a);
+}
+
+// CHECK-LABEL: @test_class_f16
+// CHECK: call i1 @llvm.amdgcn.class.f16
+void test_class_f16(global half* out, half a, int b)
+{
+  *out = __builtin_amdgcn_classh(a, b);
+}
 
 // CHECK-LABEL: @test_s_memrealtime
 // CHECK: call i64 @llvm.amdgcn.s.memrealtime()
Index: test/CodeGenOpenCL/builtins-amdgcn-error-f16-sin.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/builtins-amdgcn-error-f16-sin.cl
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-amdhsa -target-cpu tahiti -verify -S -o - %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+void test_sin_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_sinh(a); // expected-error {{'__builtin_amdgcn_sinh' needs target feature 16-bit-insts}}
+}
Index: test/CodeGenOpenCL/builtins-amdgcn-error-f16-rsq.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/builtins-amdgcn-error-f16-rsq.cl
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-amdhsa -target-cpu tahiti -verify -S -o - %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+void test_rsq_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_rsqh(a); // expected-error {{'__builtin_amdgcn_rsqh' needs target feature 16-bit-insts}}
+}
Index: test/CodeGenOpenCL/builtins-amdgcn-error-f16-rcp.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/builtins-amdgcn-error-f16-rcp.cl
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-amdhsa -target-cpu tahiti -verify -S -o - %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+void test_rcp_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_rcph(a); // expected-error {{'__builtin_amdgcn_rcph' needs target feature 16-bit-insts}}
+}
Index: test/CodeGenOpenCL/builtins-amdgcn-error-f16-ldexp.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/builtins-amdgcn-error-f16-ldexp.cl
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-amdhsa -target-cpu tahiti -verify -S -o - %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+void test_ldexp_f16(global half* out, half a, int b)
+{
+  *out = __builtin_amdgcn_ldexph(a, b); // expected-error {{'__builtin_amdgcn_ldexph' needs target feature 16-bit-insts}}
+}
Index: test/CodeGenOpenCL/builtins-amdgcn-error-f16-frexp-mant.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/builtins-amdgcn-error-f16-frexp-mant.cl
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-amdhsa -target-cpu tahiti -verify -S -o - %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+void test_frexp_mant_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_frexp_manth(a); // expected-error {{'__builtin_amdgcn_frexp_manth' needs target feature 16-bit-insts}}
+}
Index: test/CodeGenOpenCL/builtins-amdgcn-error-f16-frexp-exp.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/builtins-amdgcn-error-f16-frexp-exp.cl
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-amdhsa -target-cpu tahiti -verify -S -o - %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+void test_frexp_exp_f16(global short* out, half a)
+{
+  *out = __builtin_amdgcn_frexp_exph(a); // expected-error {{'__builtin_amdgcn_frexp_exph' needs target feature 16-bit-insts}}
+}
Index: test/CodeGenOpenCL/builtins-amdgcn-error-f16-fract.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/builtins-amdgcn-error-f16-fract.cl
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-amdhsa -target-cpu tahiti -verify -S -o - %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+void test_fract_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_fracth(a); // expected-error {{'__builtin_amdgcn_fracth' needs target feature 16-bit-insts}}
+}
Index: test/CodeGenOpenCL/builtins-amdgcn-error-f16-div-fixup.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/builtins-amdgcn-error-f16-div-fixup.cl
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-amdhsa -target-cpu tahiti -verify -S -o - %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+void test_div_fixup_f16(global half* out, half a, half b, half c)
+{
+  *out = __builtin_amdgcn_div_fixuph(a, b, c); // expected-error {{'__builtin_amdgcn_div_fixuph' needs target feature 16-bit-insts}}
+}
Index: test/CodeGenOpenCL/builtins-amdgcn-error-f16-cos.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/builtins-amdgcn-error-f16-cos.cl
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-amdhsa -target-cpu tahiti -verify -S -o - %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+void test_cos_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_cosh(a); // expected-error {{'__builtin_amdgcn_cosh' needs target feature 16-bit-insts}}
+}
Index: test/CodeGenOpenCL/builtins-amdgcn-error-f16-class.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/builtins-amdgcn-error-f16-class.cl
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-amdhsa -target-cpu tahiti -verify -S -o - %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+void test_class_f16(global half* out, half a, int b)
+{
+  *out = __builtin_amdgcn_classh(a, b); // expected-error {{'__builtin_amdgcn_classh' needs target feature 16-bit-insts}}
+}
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -8200,38 +8200,55 @@
     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
   case AMDGPU::BI__builtin_amdgcn_div_fixup:
   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
+  case AMDGPU::BI__builtin_amdgcn_div_fixuph:
     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
   case AMDGPU::BI__builtin_amdgcn_trig_preop:
   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
   case AMDGPU::BI__builtin_amdgcn_rcp:
   case AMDGPU::BI__builtin_amdgcn_rcpf:
+  case AMDGPU::BI__builtin_amdgcn_rcph:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
   case AMDGPU::BI__builtin_amdgcn_rsq:
   case AMDGPU::BI__builtin_amdgcn_rsqf:
+  case AMDGPU::BI__builtin_amdgcn_rsqh:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
   case AMDGPU::BI__builtin_amdgcn_sinf:
+  case AMDGPU::BI__builtin_amdgcn_sinh:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
   case AMDGPU::BI__builtin_amdgcn_cosf:
+  case AMDGPU::BI__builtin_amdgcn_cosh:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
   case AMDGPU::BI__builtin_amdgcn_log_clampf:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
   case AMDGPU::BI__builtin_amdgcn_ldexp:
   case AMDGPU::BI__builtin_amdgcn_ldexpf:
+  case AMDGPU::BI__builtin_amdgcn_ldexph:
     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
-  case AMDGPU::BI__builtin_amdgcn_frexp_mantf: {
+  case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
+  case AMDGPU::BI__builtin_amdgcn_frexp_manth: {
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
   }
   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
-    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp);
+    Value *Src0 = EmitScalarExpr(E->getArg(0));
+    Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
+                                { Builder.getInt32Ty(), Src0->getType() });
+    return Builder.CreateCall(F, Src0);
+  }
+  case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
+    Value *Src0 = EmitScalarExpr(E->getArg(0));
+    Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
+                                { Builder.getInt16Ty(), Src0->getType() });
+    return Builder.CreateCall(F, Src0);
   }
   case AMDGPU::BI__builtin_amdgcn_fract:
   case AMDGPU::BI__builtin_amdgcn_fractf:
+  case AMDGPU::BI__builtin_amdgcn_fracth:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
   case AMDGPU::BI__builtin_amdgcn_lerp:
     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
@@ -8245,6 +8262,7 @@
     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
   case AMDGPU::BI__builtin_amdgcn_class:
   case AMDGPU::BI__builtin_amdgcn_classf:
+  case AMDGPU::BI__builtin_amdgcn_classh:
     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
 
   case AMDGPU::BI__builtin_amdgcn_read_exec: {
Index: include/clang/Basic/BuiltinsAMDGPU.def
===================================================================
--- include/clang/Basic/BuiltinsAMDGPU.def
+++ include/clang/Basic/BuiltinsAMDGPU.def
@@ -84,6 +84,16 @@
 // VI+ only builtins.
 //===----------------------------------------------------------------------===//
 
+TARGET_BUILTIN(__builtin_amdgcn_div_fixuph, "hhhh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_rcph, "hh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_rsqh, "hh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_sinh, "hh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cosh, "hh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_ldexph, "hhi", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_frexp_manth, "hh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_frexp_exph, "sh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_fracth, "hh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_classh, "bhi", "nc", "16-bit-insts")
 TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime")
 
 //===----------------------------------------------------------------------===//

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D26476: [AMDGPU] Add support for f16 builtin functions for VI+

Reply via email to