arsenm created this revision.
arsenm added reviewers: yaxunl, svenvh, Anastasia.
Herald added a subscriber: Naghasan.
Herald added a project: All.
arsenm requested review of this revision.
Herald added a subscriber: wdng.

We want the !fpmath metadata to be attached to the sqrt intrinsic to
make it to the backend lowering.

      

Doesn't work with the default case with -fdeclare-opencl-builtins

      

Fixes #64264


https://reviews.llvm.org/D156743

Files:
  clang/lib/Headers/opencl-c.h
  clang/test/CodeGenOpenCL/sqrt-fpmath.cl

Index: clang/test/CodeGenOpenCL/sqrt-fpmath.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenOpenCL/sqrt-fpmath.cl
@@ -0,0 +1,119 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: amdgpu-registered-target
+
+// Test with -fdeclare-opencl-builtins
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -target-cpu hawaii -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -target-cpu hawaii -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s
+
+// Test without -fdeclare-opencl-builtins
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -finclude-default-header -target-cpu hawaii -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -target-cpu hawaii -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// CHECK-LABEL: define dso_local float @call_sqrt_f32
+// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CALL:%.*]] = tail call float @_Z4sqrtf(float noundef [[X]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT:    ret float [[CALL]]
+//
+float call_sqrt_f32(float x) {
+  return sqrt(x);
+}
+
+// CHECK-LABEL: define dso_local <2 x float> @call_sqrt_v2f32
+// CHECK-SAME: (<2 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef [[X]]) #[[ATTR2]]
+// CHECK-NEXT:    ret <2 x float> [[CALL]]
+//
+float2 call_sqrt_v2f32(float2 x) {
+  return sqrt(x);
+}
+
+// CHECK-LABEL: define dso_local <3 x float> @call_sqrt_v3f32
+// CHECK-SAME: (<3 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef [[X]]) #[[ATTR2]]
+// CHECK-NEXT:    ret <3 x float> [[CALL]]
+//
+float3 call_sqrt_v3f32(float3 x) {
+  return sqrt(x);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @call_sqrt_v4f32
+// CHECK-SAME: (<4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CALL:%.*]] = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef [[X]]) #[[ATTR2]]
+// CHECK-NEXT:    ret <4 x float> [[CALL]]
+//
+float4 call_sqrt_v4f32(float4 x) {
+  return sqrt(x);
+}
+
+// CHECK-LABEL: define dso_local <8 x float> @call_sqrt_v8f32
+// CHECK-SAME: (<8 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CALL:%.*]] = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef [[X]]) #[[ATTR2]]
+// CHECK-NEXT:    ret <8 x float> [[CALL]]
+//
+float8 call_sqrt_v8f32(float8 x) {
+  return sqrt(x);
+}
+
+// CHECK-LABEL: define dso_local <16 x float> @call_sqrt_v16f32
+// CHECK-SAME: (<16 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CALL:%.*]] = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef [[X]]) #[[ATTR2]]
+// CHECK-NEXT:    ret <16 x float> [[CALL]]
+//
+float16 call_sqrt_v16f32(float16 x) {
+  return sqrt(x);
+}
+
+// Not for f64
+// CHECK-LABEL: define dso_local double @call_sqrt_f64
+// CHECK-SAME: (double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CALL:%.*]] = tail call double @_Z4sqrtd(double noundef [[X]]) #[[ATTR2]]
+// CHECK-NEXT:    ret double [[CALL]]
+//
+double call_sqrt_f64(double x) {
+  return sqrt(x);
+}
+
+// Not for f64
+// CHECK-LABEL: define dso_local <2 x double> @call_sqrt_v2f64
+// CHECK-SAME: (<2 x double> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> noundef [[X]]) #[[ATTR2]]
+// CHECK-NEXT:    ret <2 x double> [[CALL]]
+//
+double2 call_sqrt_v2f64(double2 x) {
+  return sqrt(x);
+}
+
+// Not for f64
+// CHECK-LABEL: define dso_local half @call_sqrt_f16
+// CHECK-SAME: (half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CALL:%.*]] = tail call half @_Z4sqrtDh(half noundef [[X]]) #[[ATTR2]]
+// CHECK-NEXT:    ret half [[CALL]]
+//
+half call_sqrt_f16(half x) {
+  return sqrt(x);
+}
+
+// Not for f64
+// CHECK-LABEL: define dso_local <2 x half> @call_sqrt_v2f16
+// CHECK-SAME: (<2 x half> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef [[X]]) #[[ATTR2]]
+// CHECK-NEXT:    ret <2 x half> [[CALL]]
+//
+half2 call_sqrt_v2f16(half2 x) {
+  return sqrt(x);
+}
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CORRECTLYROUNDED: {{.*}}
+// DEFAULT: {{.*}}
Index: clang/lib/Headers/opencl-c.h
===================================================================
--- clang/lib/Headers/opencl-c.h
+++ clang/lib/Headers/opencl-c.h
@@ -8498,13 +8498,35 @@
 
 /**
  * Compute square root.
+ *
+ * Provide inline implementations using the builtin so that we get appropriate
+ * !fpmath based on -cl-fp32-correctly-rounded-divide-sqrt. The implementation
+ * should still provide an external definition.
  */
-float __ovld __cnfn sqrt(float);
-float2 __ovld __cnfn sqrt(float2);
-float3 __ovld __cnfn sqrt(float3);
-float4 __ovld __cnfn sqrt(float4);
-float8 __ovld __cnfn sqrt(float8);
-float16 __ovld __cnfn sqrt(float16);
+inline float __ovld __cnfn sqrt(float __x) {
+  return __builtin_elementwise_sqrt(__x);
+}
+
+inline float2 __ovld __cnfn sqrt(float2 __x) {
+  return __builtin_elementwise_sqrt(__x);
+}
+
+inline float3 __ovld __cnfn sqrt(float3 __x) {
+  return __builtin_elementwise_sqrt(__x);
+}
+
+inline float4 __ovld __cnfn sqrt(float4 __x) {
+  return __builtin_elementwise_sqrt(__x);
+}
+
+inline float8 __ovld __cnfn sqrt(float8 __x) {
+  return __builtin_elementwise_sqrt(__x);
+}
+
+inline float16 __ovld __cnfn sqrt(float16 __x) {
+  return __builtin_elementwise_sqrt(__x);
+}
+
 #ifdef cl_khr_fp64
 double __ovld __cnfn sqrt(double);
 double2 __ovld __cnfn sqrt(double2);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to