llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Wenju He (wenju-he) <details> <summary>Changes</summary> OpenCL spec relaxed half-precision divide to 1 ULP and sqrt to 1.5 ULP in https://github.com/KhronosGroup/OpenCL-Docs/pull/1293 https://github.com/KhronosGroup/OpenCL-Docs/pull/1386 --- Full diff: https://github.com/llvm/llvm-project/pull/179621.diff 3 Files Affected: - (modified) clang/lib/CodeGen/CGExpr.cpp (+8-6) - (modified) clang/test/CodeGenOpenCL/fpmath.cl (+41-3) - (modified) clang/test/CodeGenOpenCL/sqrt-fpmath.cl (+13-7) ``````````diff diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 339314ecff9cd..71a14d65c1bfe 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -6979,14 +6979,15 @@ void CodeGenFunction::SetFPAccuracy(llvm::Value *Val, float Accuracy) { void CodeGenFunction::SetSqrtFPAccuracy(llvm::Value *Val) { llvm::Type *EltTy = Val->getType()->getScalarType(); - if (!EltTy->isFloatTy()) + if (!EltTy->isFloatTy() && !EltTy->isHalfTy()) return; if ((getLangOpts().OpenCL && !CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) || (getLangOpts().HIP && getLangOpts().CUDAIsDevice && !CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) { - // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 3ulp + // OpenCL v1.1 s7.4: minimum accuracy of single precision sqrt is 3 ulp. + // OpenCL v3.0 s7.4: minimum accuracy of half precision sqrt is 1.5 ulp. // // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt // build option allows an application to specify that single precision @@ -6994,20 +6995,21 @@ void CodeGenFunction::SetSqrtFPAccuracy(llvm::Value *Val) { // source are correctly rounded. // // TODO: CUDA has a prec-sqrt flag - SetFPAccuracy(Val, 3.0f); + SetFPAccuracy(Val, EltTy->isFloatTy() ? 3.0f : 1.5f); } } void CodeGenFunction::SetDivFPAccuracy(llvm::Value *Val) { llvm::Type *EltTy = Val->getType()->getScalarType(); - if (!EltTy->isFloatTy()) + if (!EltTy->isFloatTy() && !EltTy->isHalfTy()) return; if ((getLangOpts().OpenCL && !CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) || (getLangOpts().HIP && getLangOpts().CUDAIsDevice && !CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) { - // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp + // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5 ulp. + // OpenCL v3.0 s7.4: minimum accuracy of half precision / is 1 ulp. // // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt // build option allows an application to specify that single precision @@ -7015,7 +7017,7 @@ void CodeGenFunction::SetDivFPAccuracy(llvm::Value *Val) { // source are correctly rounded. // // TODO: CUDA has a prec-div flag - SetFPAccuracy(Val, 2.5f); + SetFPAccuracy(Val, EltTy->isFloatTy() ? 2.5f : 1.f); } } diff --git a/clang/test/CodeGenOpenCL/fpmath.cl b/clang/test/CodeGenOpenCL/fpmath.cl index f3649d52e0091..5915496b3963d 100644 --- a/clang/test/CodeGenOpenCL/fpmath.cl +++ b/clang/test/CodeGenOpenCL/fpmath.cl @@ -1,8 +1,44 @@ // RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck --check-prefix=CHECK --check-prefix=NODIVOPT %s // RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -cl-fp32-correctly-rounded-divide-sqrt | FileCheck --check-prefix=CHECK --check-prefix=DIVOPT %s -// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s +// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP16 -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s // RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL1.2 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s +#ifndef NOFP16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +typedef __attribute__(( ext_vector_type(4) )) half half4; + +half hpscalardiv(half a, half b) { + // CHECK: @hpscalardiv + // CHECK: fdiv{{.*}}, + // NODIVOPT: !fpmath ![[MD_HFDIV:[0-9]+]] + // DIVOPT-NOT: !fpmath !{{[0-9]+}} + return a / b; +} + +half4 hpvectordiv(half4 a, half4 b) { + // CHECK: @hpvectordiv + // CHECK: fdiv{{.*}}, + // NODIVOPT: !fpmath ![[MD_HFDIV]] + // DIVOPT-NOT: !fpmath !{{[0-9]+}} + return a / b; +} + +half elementwise_sqrt_f16(half a) { + // CHECK-LABEL: @elementwise_sqrt_f16 + // NODIVOPT: call half @llvm.sqrt.f16(half %{{.+}}), !fpmath ![[MD_HSQRT:[0-9]+]] + // DIVOPT: call half @llvm.sqrt.f16(half %{{.+}}){{$}} + return __builtin_elementwise_sqrt(a); +} + +half4 elementwise_sqrt_v4f16(half4 a) { + // CHECK-LABEL: @elementwise_sqrt_v4f16 + // NODIVOPT: call <4 x half> @llvm.sqrt.v4f16(<4 x half> %{{.+}}), !fpmath ![[MD_HSQRT]] + // DIVOPT: call <4 x half> @llvm.sqrt.v4f16(<4 x half> %{{.+}}){{$}} + return __builtin_elementwise_sqrt(a); +} + +#endif // NOFP16 + typedef __attribute__(( ext_vector_type(4) )) float float4; float spscalardiv(float a, float b) { @@ -30,14 +66,14 @@ float spscalarsqrt(float a) { float elementwise_sqrt_f32(float a) { // CHECK-LABEL: @elementwise_sqrt_f32 - // NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD_SQRT:[0-9]+]] + // NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD_SQRT]] // DIVOPT: call float @llvm.sqrt.f32(float %{{.+}}){{$}} return __builtin_elementwise_sqrt(a); } float4 elementwise_sqrt_v4f32(float4 a) { // CHECK-LABEL: @elementwise_sqrt_v4f32 - // NODIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath ![[MD_SQRT:[0-9]+]] + // NODIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath ![[MD_SQRT]] // DIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}} return __builtin_elementwise_sqrt(a); } @@ -90,5 +126,7 @@ double4 elementwise_sqrt_v4f64(double4 a) { #endif +// NODIVOPT: ![[MD_HFDIV]] = !{float 1.000000e+00} +// NODIVOPT: ![[MD_HSQRT]] = !{float 1.500000e+00} // NODIVOPT: ![[MD_FDIV]] = !{float 2.500000e+00} // NODIVOPT: ![[MD_SQRT]] = !{float 3.000000e+00} diff --git a/clang/test/CodeGenOpenCL/sqrt-fpmath.cl b/clang/test/CodeGenOpenCL/sqrt-fpmath.cl index d0637283a7ec1..6f4adf56930ff 100644 --- a/clang/test/CodeGenOpenCL/sqrt-fpmath.cl +++ b/clang/test/CodeGenOpenCL/sqrt-fpmath.cl @@ -134,46 +134,52 @@ double16 call_sqrt_v16f64(double16 x) { } -// Not for f16 // CHECK-LABEL: define {{.*}} half @call_sqrt_f16( -// CHECK: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}{{$}} +// DEFAULT: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH:\![0-9]+]]{{$}} +// CORRECTLYROUNDED: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}{{$}} half call_sqrt_f16(half x) { return sqrt(x); } // CHECK-LABEL: define {{.*}} <2 x half> @call_sqrt_v2f16( -// CHECK: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +// DEFAULT: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}} +// CORRECTLYROUNDED: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} half2 call_sqrt_v2f16(half2 x) { return sqrt(x); } // CHECK-LABEL: define {{.*}} <3 x half> @call_sqrt_v3f16( -// CHECK: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +// DEFAULT: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}} +// CORRECTLYROUNDED: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} half3 call_sqrt_v3f16(half3 x) { return sqrt(x); } // CHECK-LABEL: define {{.*}} <4 x half> @call_sqrt_v4f16( -// CHECK: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +// DEFAULT: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}} +// CORRECTLYROUNDED: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} half4 call_sqrt_v4f16(half4 x) { return sqrt(x); } // CHECK-LABEL: define {{.*}} <8 x half> @call_sqrt_v8f16( -// CHECK: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +// DEFAULT: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}} +// CORRECTLYROUNDED: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} half8 call_sqrt_v8f16(half8 x) { return sqrt(x); } // CHECK-LABEL: define {{.*}} <16 x half> @call_sqrt_v16f16( -// CHECK: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +// DEFAULT: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}} +// CORRECTLYROUNDED: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} half16 call_sqrt_v16f16(half16 x) { return sqrt(x); } // DEFAULT: [[FPMATH]] = !{float 3.000000e+00} +// DEFAULT: [[HFPMATH]] = !{float 1.500000e+00} `````````` </details> https://github.com/llvm/llvm-project/pull/179621 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
