[PATCH] D154495: clang: Attach !fpmath metadata to __builtin_sqrt based on language flags

2023-07-14 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm closed this revision.
arsenm added a comment.

bac2a075408377a8aa41f6626b17bb3e471221f3 



CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154495/new/

https://reviews.llvm.org/D154495

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154495: clang: Attach !fpmath metadata to __builtin_sqrt based on language flags

2023-07-14 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl accepted this revision.
yaxunl added a comment.
This revision is now accepted and ready to land.

LGTM. Thanks.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154495/new/

https://reviews.llvm.org/D154495

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154495: clang: Attach !fpmath metadata to __builtin_sqrt based on language flags

2023-07-14 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added a comment.

ping


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154495/new/

https://reviews.llvm.org/D154495

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154495: clang: Attach !fpmath metadata to __builtin_sqrt based on language flags

2023-07-07 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added a comment.

In D154495#4479481 , @jdoerfert wrote:

> FWIW, I assume we want this also for OpenMP offload.

I'd be surprised if OpenMP let you do this by default


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154495/new/

https://reviews.llvm.org/D154495

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154495: clang: Attach !fpmath metadata to __builtin_sqrt based on language flags

2023-07-06 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

FWIW, I assume we want this also for OpenMP offload.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154495/new/

https://reviews.llvm.org/D154495

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154495: clang: Attach !fpmath metadata to __builtin_sqrt based on language flags

2023-07-06 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm updated this revision to Diff 537737.
arsenm added a comment.

Split div/sqrt handling since they have different values. Also cuda does have 
unimplemented flags to control these individually. Not sure it's worth trying 
to merge them into one function


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154495/new/

https://reviews.llvm.org/D154495

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/test/CodeGenCUDA/correctly-rounded-div.cu
  clang/test/CodeGenOpenCL/fpmath.cl

Index: clang/test/CodeGenOpenCL/fpmath.cl
===
--- clang/test/CodeGenOpenCL/fpmath.cl
+++ clang/test/CodeGenOpenCL/fpmath.cl
@@ -8,7 +8,7 @@
 float spscalardiv(float a, float b) {
   // CHECK: @spscalardiv
   // CHECK: fdiv{{.*}},
-  // NODIVOPT: !fpmath ![[MD:[0-9]+]]
+  // NODIVOPT: !fpmath ![[MD_FDIV:[0-9]+]]
   // DIVOPT-NOT: !fpmath !{{[0-9]+}}
   return a / b;
 }
@@ -16,11 +16,18 @@
 float4 spvectordiv(float4 a, float4 b) {
   // CHECK: @spvectordiv
   // CHECK: fdiv{{.*}},
-  // NODIVOPT: !fpmath ![[MD]]
+  // NODIVOPT: !fpmath ![[MD_FDIV]]
   // DIVOPT-NOT: !fpmath !{{[0-9]+}}
   return a / b;
 }
 
+float spscalarsqrt(float a) {
+  // CHECK-LABEL: @spscalarsqrt
+  // NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD_SQRT:[0-9]+]]
+  // DIVOPT: call float @llvm.sqrt.f32(float %{{.+}}){{$}}
+  return __builtin_sqrtf(a);
+}
+
 #if __OPENCL_C_VERSION__ >=120
 void printf(constant char* fmt, ...);
 
@@ -34,11 +41,27 @@
 
 #ifndef NOFP64
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
+typedef __attribute__(( ext_vector_type(4) )) double double4;
+
 double dpscalardiv(double a, double b) {
   // CHECK: @dpscalardiv
   // CHECK-NOT: !fpmath
   return a / b;
 }
+
+double4 dpvectordiv(double4 a, double4 b) {
+  // CHECK: @dpvectordiv
+  // CHECK-NOT: !fpmath
+  return a / b;
+}
+
+double dpscalarsqrt(double a) {
+  // CHECK-LABEL: @dpscalarsqrt
+  // CHECK: call double @llvm.sqrt.f64(double %{{.+}}){{$}}
+  return __builtin_sqrt(a);
+}
+
 #endif
 
-// NODIVOPT: ![[MD]] = !{float 2.50e+00}
+// NODIVOPT: ![[MD_FDIV]] = !{float 2.50e+00}
+// NODIVOPT: ![[MD_SQRT]] = !{float 3.00e+00}
Index: clang/test/CodeGenCUDA/correctly-rounded-div.cu
===
--- clang/test/CodeGenCUDA/correctly-rounded-div.cu
+++ clang/test/CodeGenCUDA/correctly-rounded-div.cu
@@ -32,4 +32,18 @@
   return a / b;
 }
 
-// NCRDIV: ![[MD]] = !{float 2.50e+00}
+// COMMON-LABEL: @_Z12spscalarsqrt
+// NCRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD:[0-9]+]]
+// CRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}){{$}}
+__device__ float spscalarsqrt(float a) {
+  return __builtin_sqrtf(a);
+}
+
+// COMMON-LABEL: @_Z12dpscalarsqrt
+// COMMON: call contract double @llvm.sqrt.f64(double %{{.+}}){{$}}
+// COMMON-NOT: !fpmath
+__device__ double dpscalarsqrt(double a) {
+  return __builtin_sqrt(a);
+}
+
+// NCRSQRT: ![[MD]] = !{float 2.50e+00}
Index: clang/lib/CodeGen/CodeGenFunction.h
===
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -4704,6 +4704,14 @@
   /// point operation, expressed as the maximum relative error in ulp.
   void SetFPAccuracy(llvm::Value *Val, float Accuracy);
 
+  /// Set the minimum required accuracy of the given sqrt operation
+  /// based on CodeGenOpts.
+  void SetSqrtFPAccuracy(llvm::Value *Val);
+
+  /// Set the minimum required accuracy of the given sqrt operation based on
+  /// CodeGenOpts.
+  void SetDivFPAccuracy(llvm::Value *Val);
+
   /// Set the codegen fast-math flags.
   void SetFastMathFlags(FPOptions FPFeatures);
 
Index: clang/lib/CodeGen/CGExprScalar.cpp
===
--- clang/lib/CodeGen/CGExprScalar.cpp
+++ clang/lib/CodeGen/CGExprScalar.cpp
@@ -3478,21 +3478,7 @@
 llvm::Value *Val;
 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
 Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
-if ((CGF.getLangOpts().OpenCL &&
- !CGF.CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) ||
-(CGF.getLangOpts().HIP && CGF.getLangOpts().CUDAIsDevice &&
- !CGF.CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) {
-  // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp
-  // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
-  // build option allows an application to specify that single precision
-  // floating-point divide (x/y and 1/x) and sqrt used in the program
-  // source are correctly rounded.
-  llvm::Type *ValTy = Val->getType();
-  if (ValTy->isFloatTy() ||
-  (isa(ValTy) &&
-   cast(ValTy)->getElementType()->isFloatTy()))
-CGF.SetFPAccuracy(Val, 2.5);
-   

[PATCH] D154495: clang: Attach !fpmath metadata to __builtin_sqrt based on language flags

2023-07-05 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added inline comments.



Comment at: clang/lib/CodeGen/CGExpr.cpp:5602
+// source are correctly rounded.
+SetFPAccuracy(Val, 2.5);
+  }

arsenm wrote:
> yaxunl wrote:
> > the spec says sqrt relative error is 3ULP 
> > https://registry.khronos.org/OpenCL/specs/2.2/html/OpenCL_C.html#relative-error-as-ulps
> Did that change between versions? In any case I don’t want to change the 
> currently used threshold in this patch. We only need 1.0 anyway 
Oh, I see the threshold is 2.5 for fdiv and 3.0 for sqrt.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154495/new/

https://reviews.llvm.org/D154495

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154495: clang: Attach !fpmath metadata to __builtin_sqrt based on language flags

2023-07-05 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added inline comments.



Comment at: clang/lib/CodeGen/CGExpr.cpp:5602
+// source are correctly rounded.
+SetFPAccuracy(Val, 2.5);
+  }

yaxunl wrote:
> the spec says sqrt relative error is 3ULP 
> https://registry.khronos.org/OpenCL/specs/2.2/html/OpenCL_C.html#relative-error-as-ulps
Did that change between versions? In any case I don’t want to change the 
currently used threshold in this patch. We only need 1.0 anyway 


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154495/new/

https://reviews.llvm.org/D154495

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154495: clang: Attach !fpmath metadata to __builtin_sqrt based on language flags

2023-07-05 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added inline comments.



Comment at: clang/lib/CodeGen/CGExpr.cpp:5602
+// source are correctly rounded.
+SetFPAccuracy(Val, 2.5);
+  }

the spec says sqrt relative error is 3ULP 
https://registry.khronos.org/OpenCL/specs/2.2/html/OpenCL_C.html#relative-error-as-ulps


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154495/new/

https://reviews.llvm.org/D154495

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154495: clang: Attach !fpmath metadata to __builtin_sqrt based on language flags

2023-07-05 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm created this revision.
arsenm added reviewers: yaxunl, Anastasia, jcranmer-intel, tra, jlebar, jhuber6.
Herald added a project: All.
arsenm requested review of this revision.
Herald added subscribers: jplehr, sstefan1, wdng.
Herald added a reviewer: jdoerfert.

OpenCL and HIP have -cl-fp32-correctly-rounded-divide-sqrt and
-fno-hip-correctly-rounded-divide-sqrt. The corresponding fpmath metadata
was only set on fdiv, and not sqrt. The backend is currently underutilizing
sqrt lowering options, and the responsibility is split between the libraries
and backend and this metadata is needed.

  

CUDA/NVCC has -prec-div and -prev-sqrt but clang doesn't appear to be
aiming for compatibility with those. Don't know if OpenMP has a similar
control.


https://reviews.llvm.org/D154495

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/test/CodeGenCUDA/correctly-rounded-div.cu
  clang/test/CodeGenOpenCL/fpmath.cl

Index: clang/test/CodeGenOpenCL/fpmath.cl
===
--- clang/test/CodeGenOpenCL/fpmath.cl
+++ clang/test/CodeGenOpenCL/fpmath.cl
@@ -21,6 +21,13 @@
   return a / b;
 }
 
+float spscalarsqrt(float a) {
+  // CHECK-LABEL: @spscalarsqrt
+  // NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD:[0-9]+]]
+  // DIVOPT: call float @llvm.sqrt.f32(float %{{.+}}){{$}}
+  return __builtin_sqrtf(a);
+}
+
 #if __OPENCL_C_VERSION__ >=120
 void printf(constant char* fmt, ...);
 
@@ -34,11 +41,26 @@
 
 #ifndef NOFP64
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
+typedef __attribute__(( ext_vector_type(4) )) double double4;
+
 double dpscalardiv(double a, double b) {
   // CHECK: @dpscalardiv
   // CHECK-NOT: !fpmath
   return a / b;
 }
+
+double4 dpvectordiv(double4 a, double4 b) {
+  // CHECK: @dpvectordiv
+  // CHECK-NOT: !fpmath
+  return a / b;
+}
+
+double dpscalarsqrt(double a) {
+  // CHECK-LABEL: @dpscalarsqrt
+  // CHECK: call double @llvm.sqrt.f64(double %{{.+}}){{$}}
+  return __builtin_sqrt(a);
+}
+
 #endif
 
 // NODIVOPT: ![[MD]] = !{float 2.50e+00}
Index: clang/test/CodeGenCUDA/correctly-rounded-div.cu
===
--- clang/test/CodeGenCUDA/correctly-rounded-div.cu
+++ clang/test/CodeGenCUDA/correctly-rounded-div.cu
@@ -32,4 +32,18 @@
   return a / b;
 }
 
-// NCRDIV: ![[MD]] = !{float 2.50e+00}
+// COMMON-LABEL: @_Z12spscalarsqrt
+// NCRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD:[0-9]+]]
+// CRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}){{$}}
+__device__ float spscalarsqrt(float a) {
+  return __builtin_sqrtf(a);
+}
+
+// COMMON-LABEL: @_Z12dpscalarsqrt
+// COMMON: call contract double @llvm.sqrt.f64(double %{{.+}}){{$}}
+// COMMON-NOT: !fpmath
+__device__ double dpscalarsqrt(double a) {
+  return __builtin_sqrt(a);
+}
+
+// NCRSQRT: ![[MD]] = !{float 2.50e+00}
Index: clang/lib/CodeGen/CodeGenFunction.h
===
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -4688,6 +4688,10 @@
   /// point operation, expressed as the maximum relative error in ulp.
   void SetFPAccuracy(llvm::Value *Val, float Accuracy);
 
+  /// SetFPAccuracy - Set the minimum required accuracy of the given fdiv or
+  /// sqrt operation based on CodeGenOpts.
+  void SetSqrtOrDivFPAccuracy(llvm::Value *Val);
+
   /// Set the codegen fast-math flags.
   void SetFastMathFlags(FPOptions FPFeatures);
 
Index: clang/lib/CodeGen/CGExprScalar.cpp
===
--- clang/lib/CodeGen/CGExprScalar.cpp
+++ clang/lib/CodeGen/CGExprScalar.cpp
@@ -3478,21 +3478,7 @@
 llvm::Value *Val;
 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
 Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
-if ((CGF.getLangOpts().OpenCL &&
- !CGF.CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) ||
-(CGF.getLangOpts().HIP && CGF.getLangOpts().CUDAIsDevice &&
- !CGF.CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) {
-  // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp
-  // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
-  // build option allows an application to specify that single precision
-  // floating-point divide (x/y and 1/x) and sqrt used in the program
-  // source are correctly rounded.
-  llvm::Type *ValTy = Val->getType();
-  if (ValTy->isFloatTy() ||
-  (isa(ValTy) &&
-   cast(ValTy)->getElementType()->isFloatTy()))
-CGF.SetFPAccuracy(Val, 2.5);
-}
+CGF.SetSqrtOrDivFPAccuracy(Val);
 return Val;
   }
   else if (Ops.isFixedPointOp())
Index: clang/lib/CodeGen/CGExpr.cpp
===
--- clang/lib/CodeGen/CGExpr.cpp