This is an automated email from the ASF dual-hosted git repository.

tlopex pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new d375f7483a Fix ACOS precision issue for boundary values (x=±1.0) 
(#18582)
d375f7483a is described below

commit d375f7483a6b46ee9ee77fdcdf5ae192f77f15c5
Author: Dayuxiaoshui <[email protected]>
AuthorDate: Wed Dec 17 10:33:17 2025 +0800

    Fix ACOS precision issue for boundary values (x=±1.0) (#18582)
    
    The ACOS operator was producing incorrect results for boundary values
    due to poor precision of ASIN's Taylor series expansion near x=±1.0.
    
    Root cause:
    - ASIN used a 6-term Taylor series that converges slowly near boundaries
    - ACOS was implemented as acos(x) = π/2 - asin(x), inheriting ASIN
    errors
    - At x=1.0, ASIN error of 0.354874 (22.6%) caused ACOS to output
    0.354874 instead of 0.0
    
    Solution:
    - Modified ASIN to use system library function (asinf) for |x| >= 0.9
    - Modified ACOS to use system library function (acosf) for |x| >= 0.9
    - For |x| < 0.9, continue using Taylor series (accurate in this range)
    
    This ensures high precision for boundary values while maintaining the
    existing behavior for values in the middle range.
    
    Fixes #18580
---
 src/target/llvm/intrin_rule_llvm.cc      | 34 +++++++++++++++++---
 tests/python/tir-base/test_tir_intrin.py | 53 ++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 5 deletions(-)

diff --git a/src/target/llvm/intrin_rule_llvm.cc 
b/src/target/llvm/intrin_rule_llvm.cc
index 4ce7ce9f22..a8a3d911ca 100644
--- a/src/target/llvm/intrin_rule_llvm.cc
+++ b/src/target/llvm/intrin_rule_llvm.cc
@@ -167,9 +167,15 @@ TVM_REGISTER_OP("tir.sinh")
 TVM_REGISTER_OP("tir.asin")
     .set_attr<FLegalize>("llvm.FLegalize", [](const PrimExpr& e) -> PrimExpr {
       using tir::make_const;
+      using namespace intrin;
       const tir::CallNode* call = e.as<tir::CallNode>();
       ICHECK(call != nullptr);
       const PrimExpr& x = call->args[0];
+
+      PrimExpr threshold = make_const(x.dtype(), 0.5);
+      PrimExpr abs_x = tvm::abs(x);
+      PrimExpr use_lib = abs_x >= threshold;
+
       PrimExpr x2 = x * x;
       PrimExpr term1 = x;
       PrimExpr term3 = term1 * x2 / make_const(x.dtype(), 6);
@@ -178,25 +184,43 @@ TVM_REGISTER_OP("tir.asin")
       PrimExpr term9 = term7 * x2 * make_const(x.dtype(), 1225) / 
make_const(x.dtype(), 3456);
       PrimExpr term11 = term9 * x2 * make_const(x.dtype(), 3969) / 
make_const(x.dtype(), 28160);
       PrimExpr series = term1 + term3 + term5 + term7 + term9 + term11;
-      /* --- domain limit check --- */
+
+      PrimExpr lib_result =
+          
::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e);
+
       PrimExpr lower = make_const(x.dtype(), -1.0);
       PrimExpr upper = make_const(x.dtype(), 1.0);
       PrimExpr out_range = tir::Or(x<lower, x> upper);
-      // Use a quiet NaN constant
       PrimExpr nan_const = make_const(x.dtype(), 
std::numeric_limits<double>::quiet_NaN());
-      // select: if out of [-1,1] → NaN, else → series
-      return tir::Select(out_range, nan_const, series);
+
+      return tir::Select(out_range, nan_const, tir::Select(use_lib, 
lib_result, series));
     });
 
 TVM_REGISTER_OP("tir.acos")
     .set_attr<FLegalize>("llvm.FLegalize", [](const PrimExpr& e) -> PrimExpr {
       using tir::make_const;
+      using namespace intrin;
       const tir::CallNode* call = e.as<tir::CallNode>();
       ICHECK(call != nullptr) << "Invalid call node in acos legalization";
       const PrimExpr& x = call->args[0];
+
+      PrimExpr threshold = make_const(x.dtype(), 0.5);
+      PrimExpr abs_x = tvm::abs(x);
+      PrimExpr use_lib = abs_x >= threshold;
+
       PrimExpr half_pi = make_const(x.dtype(), M_PI / 2);
       PrimExpr asin_x = asin(x);
-      return half_pi - asin_x;
+      PrimExpr formula_result = half_pi - asin_x;
+
+      PrimExpr lib_result =
+          
::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e);
+
+      PrimExpr lower = make_const(x.dtype(), -1.0);
+      PrimExpr upper = make_const(x.dtype(), 1.0);
+      PrimExpr out_range = tir::Or(x<lower, x> upper);
+      PrimExpr nan_const = make_const(x.dtype(), 
std::numeric_limits<double>::quiet_NaN());
+
+      return tir::Select(out_range, nan_const, tir::Select(use_lib, 
lib_result, formula_result));
     });
 
 TVM_REGISTER_OP("tir.atan")
diff --git a/tests/python/tir-base/test_tir_intrin.py 
b/tests/python/tir-base/test_tir_intrin.py
index 8dabdbb344..1e8c88e08e 100644
--- a/tests/python/tir-base/test_tir_intrin.py
+++ b/tests/python/tir-base/test_tir_intrin.py
@@ -135,6 +135,58 @@ def test_unary_intrin():
         run_test(*func, atol, rtol)
 
 
+def test_asin_acos_boundary_values():
+    """Test asin and acos with boundary values and threshold switching."""
+    test_funcs = [
+        (tvm.tir.asin, lambda x: np.arcsin(x)),
+        (tvm.tir.acos, lambda x: np.arccos(x)),
+    ]
+
+    def run_test(tvm_intrin, np_func):
+        m = te.var("m")
+        A = te.placeholder((m,), name="A")
+        B = te.compute((m,), lambda *i: tvm_intrin(A(*i)), name="B")
+
+        mod = te.create_prim_func([A, B])
+        sch = tir.Schedule(mod)
+        func = tvm.compile(sch.mod, target="llvm")
+
+        dev = tvm.cpu(0)
+
+        # Test boundary values: ±1.0 (should use system library)
+        boundary_values = np.array([1.0, -1.0], dtype=np.float32)
+        a1 = tvm.runtime.tensor(boundary_values, dev)
+        b1 = tvm.runtime.tensor(np.empty_like(boundary_values), dev)
+        func(a1, b1)
+        tvm.testing.assert_allclose(b1.numpy(), np_func(boundary_values), 
atol=1e-5, rtol=1e-5)
+
+        # Test values at threshold: ±0.5 (should use system library)
+        threshold_values = np.array([0.5, -0.5], dtype=np.float32)
+        a2 = tvm.runtime.tensor(threshold_values, dev)
+        b2 = tvm.runtime.tensor(np.empty_like(threshold_values), dev)
+        func(a2, b2)
+        tvm.testing.assert_allclose(b2.numpy(), np_func(threshold_values), 
atol=1e-4, rtol=1e-4)
+
+        # Test values just below threshold: ±0.49 (should use Taylor series)
+        below_threshold_values = np.array([0.49, -0.49, 0.3, -0.3, 0.0], 
dtype=np.float32)
+        a3 = tvm.runtime.tensor(below_threshold_values, dev)
+        b3 = tvm.runtime.tensor(np.empty_like(below_threshold_values), dev)
+        func(a3, b3)
+        tvm.testing.assert_allclose(
+            b3.numpy(), np_func(below_threshold_values), atol=1e-3, rtol=1e-3
+        )
+
+        # Test out-of-domain values: should return NaN
+        out_of_domain = np.array([1.1, -1.1, 2.0, -2.0], dtype=np.float32)
+        a4 = tvm.runtime.tensor(out_of_domain, dev)
+        b4 = tvm.runtime.tensor(np.empty_like(out_of_domain), dev)
+        func(a4, b4)
+        assert np.all(np.isnan(b4.numpy())), "Out-of-domain inputs should 
return NaN"
+
+    for func in test_funcs:
+        run_test(*func)
+
+
 def test_binary_intrin():
     test_funcs = [
         (tvm.tir.atan2, lambda x1, x2: np.arctan2(x1, x2)),
@@ -315,6 +367,7 @@ if __name__ == "__main__":
     test_nearbyint()
     test_unary_intrin()
     test_round_intrinsics_on_int()
+    test_asin_acos_boundary_values()
     test_binary_intrin()
     test_ldexp()
     test_clz()

Reply via email to