This is an automated email from the ASF dual-hosted git repository.

tlopex pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 740eebe696 [BugFix][Target][LLVM] Use libm for asin/acos instead of 
buggy inline Taylor (#19567)
740eebe696 is described below

commit 740eebe69649799c7e8d7fe790fd8abeb4543735
Author: Soowon Jeong <[email protected]>
AuthorDate: Tue May 19 10:46:39 2026 +0900

    [BugFix][Target][LLVM] Use libm for asin/acos instead of buggy inline 
Taylor (#19567)
    
    ## Summary
    
    `tirx.asin`'s LLVM legalize used a 6-term Taylor series for `|x| < 0.5`
    with wrong recurrence coefficients. The ratios in the code (`9/40`,
    `25/112`, `1225/3456`, `3969/28160`) don't match the real asin series
    (`9/20`, `25/42`, `49/72`, `81/110`), so mid-range inputs lose ~1e-3 of
    precision — over 1000 float32 ULP. `acos` inherits it via `π/2 −
    asin(x)`.
    
    ```
    x=0.47  ORT=0.48929077  TVM(old)=0.48820966  err=-1.08e-3
    ```
    
    The Taylor branch was added in #17945 as the initial implementation,
    with no libm fallback. #18582 later patched only `|x| ≥ 0.5` by routing
    to the libm extern, leaving the buggy mid-range in place. I see no
    evidence the inline series was an intentional fast-path.
    
    ## Fix
    
    Drop the inline series, route the whole domain through the existing
    `asinf`/`acosf` extern, keep the out-of-range NaN guard. Max error over
    `x ∈ [-1, 1]` drops to **2.4e-7** (ULP-grade).
    
    ## Tests
    
    - Re-enable `Asin`/`Acos` in `test_unary` (they were commented out with
    a TODO about Taylor precision loss).
    - Existing `test_asin_acos_boundary_values` (#18582) still passes.
    
    If the inline polynomial was intentional for some target/path, please
    flag it — I'll restore it with corrected coefficients instead.
    
    `Atan` is still disabled; that's a separate `x² + 1` overflow bug
    (#19560).
    
    Fixes #19563.
---
 src/target/llvm/intrin_rule_llvm.cc      | 47 ++------------------------------
 tests/python/relax/test_frontend_onnx.py |  6 ++--
 2 files changed, 5 insertions(+), 48 deletions(-)

diff --git a/src/target/llvm/intrin_rule_llvm.cc 
b/src/target/llvm/intrin_rule_llvm.cc
index 3244deab87..ae57e8d9a6 100644
--- a/src/target/llvm/intrin_rule_llvm.cc
+++ b/src/target/llvm/intrin_rule_llvm.cc
@@ -173,61 +173,18 @@ TVM_REGISTER_OP("tirx.sinh")
 
 TVM_REGISTER_OP("tirx.asin")
     .set_attr<FLegalize>("llvm.FLegalize", [](const PrimExpr& e) -> PrimExpr {
-      using tirx::make_const;
       using namespace intrin;
       const tirx::CallNode* call = e.as<tirx::CallNode>();
       TVM_FFI_ICHECK(call != nullptr);
-      const PrimExpr& x = call->args[0];
-
-      PrimExpr threshold = make_const(x.dtype(), 0.5);
-      PrimExpr abs_x = tvm::abs(x);
-      PrimExpr use_lib = abs_x >= threshold;
-
-      PrimExpr x2 = x * x;
-      PrimExpr term1 = x;
-      PrimExpr term3 = term1 * x2 / make_const(x.dtype(), 6);
-      PrimExpr term5 = term3 * x2 * make_const(x.dtype(), 9) / 
make_const(x.dtype(), 40);
-      PrimExpr term7 = term5 * x2 * make_const(x.dtype(), 25) / 
make_const(x.dtype(), 112);
-      PrimExpr term9 = term7 * x2 * make_const(x.dtype(), 1225) / 
make_const(x.dtype(), 3456);
-      PrimExpr term11 = term9 * x2 * make_const(x.dtype(), 3969) / 
make_const(x.dtype(), 28160);
-      PrimExpr series = term1 + term3 + term5 + term7 + term9 + term11;
-
-      PrimExpr lib_result =
-          
::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e);
-
-      PrimExpr lower = make_const(x.dtype(), -1.0);
-      PrimExpr upper = make_const(x.dtype(), 1.0);
-      PrimExpr out_range = tirx::Or(x<lower, x> upper);
-      PrimExpr nan_const = make_const(x.dtype(), 
std::numeric_limits<double>::quiet_NaN());
-
-      return tirx::Select(out_range, nan_const, tirx::Select(use_lib, 
lib_result, series));
+      return 
::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e);
     });
 
 TVM_REGISTER_OP("tirx.acos")
     .set_attr<FLegalize>("llvm.FLegalize", [](const PrimExpr& e) -> PrimExpr {
-      using tirx::make_const;
       using namespace intrin;
       const tirx::CallNode* call = e.as<tirx::CallNode>();
       TVM_FFI_ICHECK(call != nullptr) << "Invalid call node in acos 
legalization";
-      const PrimExpr& x = call->args[0];
-
-      PrimExpr threshold = make_const(x.dtype(), 0.5);
-      PrimExpr abs_x = tvm::abs(x);
-      PrimExpr use_lib = abs_x >= threshold;
-
-      PrimExpr half_pi = make_const(x.dtype(), M_PI / 2);
-      PrimExpr asin_x = asin(x);
-      PrimExpr formula_result = half_pi - asin_x;
-
-      PrimExpr lib_result =
-          
::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e);
-
-      PrimExpr lower = make_const(x.dtype(), -1.0);
-      PrimExpr upper = make_const(x.dtype(), 1.0);
-      PrimExpr out_range = tirx::Or(x<lower, x> upper);
-      PrimExpr nan_const = make_const(x.dtype(), 
std::numeric_limits<double>::quiet_NaN());
-
-      return tirx::Select(out_range, nan_const, tirx::Select(use_lib, 
lib_result, formula_result));
+      return 
::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e);
     });
 
 TVM_REGISTER_OP("tirx.atan")
diff --git a/tests/python/relax/test_frontend_onnx.py 
b/tests/python/relax/test_frontend_onnx.py
index 26daeff46d..d73ec5bae5 100644
--- a/tests/python/relax/test_frontend_onnx.py
+++ b/tests/python/relax/test_frontend_onnx.py
@@ -724,9 +724,9 @@ def test_bitwise_shift(direction: str):
         "Sinh",
         "Cosh",
         "Tanh",
-        # "Asin",  // TODO @jikechao, fix the precision loss due to the Taylor 
approximation
-        # "Acos",
-        # "Atan",
+        "Asin",
+        "Acos",
+        # "Atan",  // TODO: fix x²+1 overflow in llvm legalize for huge inputs 
(issue #19560)
         "Asinh",
         "Acosh",
         "Atanh",

Reply via email to