================
@@ -694,29 +693,11 @@ Value *AMDGPUCodeGenPrepareImpl::emitRsqF64(IRBuilder<> 
&Builder, Value *X,
   return Builder.CreateFMA(Y0E, EFMA, Y0);
 }
 
-bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,
-                                                  FastMathFlags DivFMF,
+bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(FastMathFlags DivFMF,
                                                   FastMathFlags SqrtFMF) const 
{
-  // The rsqrt contraction increases accuracy from ~2ulp to ~1ulp.
-  if (!DivFMF.allowContract() || !SqrtFMF.allowContract())
-    return false;
-
-  Type *EltTy = SqrtOp->getType()->getScalarType();
-  switch (EltTy->getTypeID()) {
-  case Type::FloatTyID:
-    // v_rsq_f32 gives 1ulp
-    // Separate correctly rounded fdiv + sqrt give ~1.81 ulp.
-
-    // FIXME: rsq formation should not depend on approx func or the fpmath
-    // accuracy. This strictly improves precision.
-    return SqrtFMF.approxFunc() || SqrtOp->getFPAccuracy() >= 1.0f;
-  case Type::DoubleTyID:
-    return true;
-  default:
-    return false;
-  }
-
-  llvm_unreachable("covered switch");
+  // The rsqrt contraction increases accuracy from ~2ulp to ~1ulp for f32 and
+  // f64.
+  return DivFMF.allowContract() && SqrtFMF.allowContract();
----------------
dtcxzyw wrote:

How about rsq.f16? IIRC the f16 path doesn't exist, right?

https://github.com/llvm/llvm-project/pull/172082
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to