This is an automated email from the ASF dual-hosted git repository.
tlopex pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new d375f7483a Fix ACOS precision issue for boundary values (x=±1.0)
(#18582)
d375f7483a is described below
commit d375f7483a6b46ee9ee77fdcdf5ae192f77f15c5
Author: Dayuxiaoshui <[email protected]>
AuthorDate: Wed Dec 17 10:33:17 2025 +0800
Fix ACOS precision issue for boundary values (x=±1.0) (#18582)
The ACOS operator was producing incorrect results for boundary values
due to poor precision of ASIN's Taylor series expansion near x=±1.0.
Root cause:
- ASIN used a 6-term Taylor series that converges slowly near boundaries
- ACOS was implemented as acos(x) = π/2 - asin(x), inheriting ASIN
errors
- At x=1.0, ASIN error of 0.354874 (22.6%) caused ACOS to output
0.354874 instead of 0.0
Solution:
- Modified ASIN to use system library function (asinf) for |x| >= 0.9
- Modified ACOS to use system library function (acosf) for |x| >= 0.9
- For |x| < 0.9, continue using Taylor series (accurate in this range)
This ensures high precision for boundary values while maintaining the
existing behavior for values in the middle range.
Fixes #18580
---
src/target/llvm/intrin_rule_llvm.cc | 34 +++++++++++++++++---
tests/python/tir-base/test_tir_intrin.py | 53 ++++++++++++++++++++++++++++++++
2 files changed, 82 insertions(+), 5 deletions(-)
diff --git a/src/target/llvm/intrin_rule_llvm.cc
b/src/target/llvm/intrin_rule_llvm.cc
index 4ce7ce9f22..a8a3d911ca 100644
--- a/src/target/llvm/intrin_rule_llvm.cc
+++ b/src/target/llvm/intrin_rule_llvm.cc
@@ -167,9 +167,15 @@ TVM_REGISTER_OP("tir.sinh")
TVM_REGISTER_OP("tir.asin")
.set_attr<FLegalize>("llvm.FLegalize", [](const PrimExpr& e) -> PrimExpr {
using tir::make_const;
+ using namespace intrin;
const tir::CallNode* call = e.as<tir::CallNode>();
ICHECK(call != nullptr);
const PrimExpr& x = call->args[0];
+
+ PrimExpr threshold = make_const(x.dtype(), 0.5);
+ PrimExpr abs_x = tvm::abs(x);
+ PrimExpr use_lib = abs_x >= threshold;
+
PrimExpr x2 = x * x;
PrimExpr term1 = x;
PrimExpr term3 = term1 * x2 / make_const(x.dtype(), 6);
@@ -178,25 +184,43 @@ TVM_REGISTER_OP("tir.asin")
PrimExpr term9 = term7 * x2 * make_const(x.dtype(), 1225) /
make_const(x.dtype(), 3456);
PrimExpr term11 = term9 * x2 * make_const(x.dtype(), 3969) /
make_const(x.dtype(), 28160);
PrimExpr series = term1 + term3 + term5 + term7 + term9 + term11;
- /* --- domain limit check --- */
+
+ PrimExpr lib_result =
+
::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e);
+
PrimExpr lower = make_const(x.dtype(), -1.0);
PrimExpr upper = make_const(x.dtype(), 1.0);
PrimExpr out_range = tir::Or(x<lower, x> upper);
- // Use a quiet NaN constant
PrimExpr nan_const = make_const(x.dtype(),
std::numeric_limits<double>::quiet_NaN());
- // select: if out of [-1,1] → NaN, else → series
- return tir::Select(out_range, nan_const, series);
+
+ return tir::Select(out_range, nan_const, tir::Select(use_lib,
lib_result, series));
});
TVM_REGISTER_OP("tir.acos")
.set_attr<FLegalize>("llvm.FLegalize", [](const PrimExpr& e) -> PrimExpr {
using tir::make_const;
+ using namespace intrin;
const tir::CallNode* call = e.as<tir::CallNode>();
ICHECK(call != nullptr) << "Invalid call node in acos legalization";
const PrimExpr& x = call->args[0];
+
+ PrimExpr threshold = make_const(x.dtype(), 0.5);
+ PrimExpr abs_x = tvm::abs(x);
+ PrimExpr use_lib = abs_x >= threshold;
+
PrimExpr half_pi = make_const(x.dtype(), M_PI / 2);
PrimExpr asin_x = asin(x);
- return half_pi - asin_x;
+ PrimExpr formula_result = half_pi - asin_x;
+
+ PrimExpr lib_result =
+
::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e);
+
+ PrimExpr lower = make_const(x.dtype(), -1.0);
+ PrimExpr upper = make_const(x.dtype(), 1.0);
+ PrimExpr out_range = tir::Or(x<lower, x> upper);
+ PrimExpr nan_const = make_const(x.dtype(),
std::numeric_limits<double>::quiet_NaN());
+
+ return tir::Select(out_range, nan_const, tir::Select(use_lib,
lib_result, formula_result));
});
TVM_REGISTER_OP("tir.atan")
diff --git a/tests/python/tir-base/test_tir_intrin.py
b/tests/python/tir-base/test_tir_intrin.py
index 8dabdbb344..1e8c88e08e 100644
--- a/tests/python/tir-base/test_tir_intrin.py
+++ b/tests/python/tir-base/test_tir_intrin.py
@@ -135,6 +135,58 @@ def test_unary_intrin():
run_test(*func, atol, rtol)
+def test_asin_acos_boundary_values():
+ """Test asin and acos with boundary values and threshold switching."""
+ test_funcs = [
+ (tvm.tir.asin, lambda x: np.arcsin(x)),
+ (tvm.tir.acos, lambda x: np.arccos(x)),
+ ]
+
+ def run_test(tvm_intrin, np_func):
+ m = te.var("m")
+ A = te.placeholder((m,), name="A")
+ B = te.compute((m,), lambda *i: tvm_intrin(A(*i)), name="B")
+
+ mod = te.create_prim_func([A, B])
+ sch = tir.Schedule(mod)
+ func = tvm.compile(sch.mod, target="llvm")
+
+ dev = tvm.cpu(0)
+
+ # Test boundary values: ±1.0 (should use system library)
+ boundary_values = np.array([1.0, -1.0], dtype=np.float32)
+ a1 = tvm.runtime.tensor(boundary_values, dev)
+ b1 = tvm.runtime.tensor(np.empty_like(boundary_values), dev)
+ func(a1, b1)
+ tvm.testing.assert_allclose(b1.numpy(), np_func(boundary_values),
atol=1e-5, rtol=1e-5)
+
+ # Test values at threshold: ±0.5 (should use system library)
+ threshold_values = np.array([0.5, -0.5], dtype=np.float32)
+ a2 = tvm.runtime.tensor(threshold_values, dev)
+ b2 = tvm.runtime.tensor(np.empty_like(threshold_values), dev)
+ func(a2, b2)
+ tvm.testing.assert_allclose(b2.numpy(), np_func(threshold_values),
atol=1e-4, rtol=1e-4)
+
+ # Test values just below threshold: ±0.49 (should use Taylor series)
+ below_threshold_values = np.array([0.49, -0.49, 0.3, -0.3, 0.0],
dtype=np.float32)
+ a3 = tvm.runtime.tensor(below_threshold_values, dev)
+ b3 = tvm.runtime.tensor(np.empty_like(below_threshold_values), dev)
+ func(a3, b3)
+ tvm.testing.assert_allclose(
+ b3.numpy(), np_func(below_threshold_values), atol=1e-3, rtol=1e-3
+ )
+
+ # Test out-of-domain values: should return NaN
+ out_of_domain = np.array([1.1, -1.1, 2.0, -2.0], dtype=np.float32)
+ a4 = tvm.runtime.tensor(out_of_domain, dev)
+ b4 = tvm.runtime.tensor(np.empty_like(out_of_domain), dev)
+ func(a4, b4)
+ assert np.all(np.isnan(b4.numpy())), "Out-of-domain inputs should
return NaN"
+
+ for func in test_funcs:
+ run_test(*func)
+
+
def test_binary_intrin():
test_funcs = [
(tvm.tir.atan2, lambda x1, x2: np.arctan2(x1, x2)),
@@ -315,6 +367,7 @@ if __name__ == "__main__":
test_nearbyint()
test_unary_intrin()
test_round_intrinsics_on_int()
+ test_asin_acos_boundary_values()
test_binary_intrin()
test_ldexp()
test_clz()