This is an automated email from the ASF dual-hosted git repository.
tlopex pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 740eebe696 [BugFix][Target][LLVM] Use libm for asin/acos instead of
buggy inline Taylor (#19567)
740eebe696 is described below
commit 740eebe69649799c7e8d7fe790fd8abeb4543735
Author: Soowon Jeong <[email protected]>
AuthorDate: Tue May 19 10:46:39 2026 +0900
[BugFix][Target][LLVM] Use libm for asin/acos instead of buggy inline
Taylor (#19567)
## Summary
`tirx.asin`'s LLVM legalize used a 6-term Taylor series for `|x| < 0.5`
with wrong recurrence coefficients. The ratios in the code (`9/40`,
`25/112`, `1225/3456`, `3969/28160`) don't match the real asin series
(`9/20`, `25/42`, `49/72`, `81/110`), so mid-range inputs lose ~1e-3 of
precision — over 1000 float32 ULP. `acos` inherits it via `π/2 −
asin(x)`.
```
x=0.47 ORT=0.48929077 TVM(old)=0.48820966 err=-1.08e-3
```
The Taylor branch was added in #17945 as the initial implementation,
with no libm fallback. #18582 later patched only `|x| ≥ 0.5` by routing
to the libm extern, leaving the buggy mid-range in place. I see no
evidence the inline series was an intentional fast-path.
## Fix
Drop the inline series, route the whole domain through the existing
`asinf`/`acosf` extern, keep the out-of-range NaN guard. Max error over
`x ∈ [-1, 1]` drops to **2.4e-7** (ULP-grade).
## Tests
- Re-enable `Asin`/`Acos` in `test_unary` (they were commented out with
a TODO about Taylor precision loss).
- Existing `test_asin_acos_boundary_values` (#18582) still passes.
If the inline polynomial was intentional for some target/path, please
flag it — I'll restore it with corrected coefficients instead.
`Atan` is still disabled; that's a separate `x² + 1` overflow bug
(#19560).
Fixes #19563.
---
src/target/llvm/intrin_rule_llvm.cc | 47 ++------------------------------
tests/python/relax/test_frontend_onnx.py | 6 ++--
2 files changed, 5 insertions(+), 48 deletions(-)
diff --git a/src/target/llvm/intrin_rule_llvm.cc
b/src/target/llvm/intrin_rule_llvm.cc
index 3244deab87..ae57e8d9a6 100644
--- a/src/target/llvm/intrin_rule_llvm.cc
+++ b/src/target/llvm/intrin_rule_llvm.cc
@@ -173,61 +173,18 @@ TVM_REGISTER_OP("tirx.sinh")
TVM_REGISTER_OP("tirx.asin")
.set_attr<FLegalize>("llvm.FLegalize", [](const PrimExpr& e) -> PrimExpr {
- using tirx::make_const;
using namespace intrin;
const tirx::CallNode* call = e.as<tirx::CallNode>();
TVM_FFI_ICHECK(call != nullptr);
- const PrimExpr& x = call->args[0];
-
- PrimExpr threshold = make_const(x.dtype(), 0.5);
- PrimExpr abs_x = tvm::abs(x);
- PrimExpr use_lib = abs_x >= threshold;
-
- PrimExpr x2 = x * x;
- PrimExpr term1 = x;
- PrimExpr term3 = term1 * x2 / make_const(x.dtype(), 6);
- PrimExpr term5 = term3 * x2 * make_const(x.dtype(), 9) /
make_const(x.dtype(), 40);
- PrimExpr term7 = term5 * x2 * make_const(x.dtype(), 25) /
make_const(x.dtype(), 112);
- PrimExpr term9 = term7 * x2 * make_const(x.dtype(), 1225) /
make_const(x.dtype(), 3456);
- PrimExpr term11 = term9 * x2 * make_const(x.dtype(), 3969) /
make_const(x.dtype(), 28160);
- PrimExpr series = term1 + term3 + term5 + term7 + term9 + term11;
-
- PrimExpr lib_result =
-
::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e);
-
- PrimExpr lower = make_const(x.dtype(), -1.0);
- PrimExpr upper = make_const(x.dtype(), 1.0);
- PrimExpr out_range = tirx::Or(x<lower, x> upper);
- PrimExpr nan_const = make_const(x.dtype(),
std::numeric_limits<double>::quiet_NaN());
-
- return tirx::Select(out_range, nan_const, tirx::Select(use_lib,
lib_result, series));
+ return
::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e);
});
TVM_REGISTER_OP("tirx.acos")
.set_attr<FLegalize>("llvm.FLegalize", [](const PrimExpr& e) -> PrimExpr {
- using tirx::make_const;
using namespace intrin;
const tirx::CallNode* call = e.as<tirx::CallNode>();
TVM_FFI_ICHECK(call != nullptr) << "Invalid call node in acos
legalization";
- const PrimExpr& x = call->args[0];
-
- PrimExpr threshold = make_const(x.dtype(), 0.5);
- PrimExpr abs_x = tvm::abs(x);
- PrimExpr use_lib = abs_x >= threshold;
-
- PrimExpr half_pi = make_const(x.dtype(), M_PI / 2);
- PrimExpr asin_x = asin(x);
- PrimExpr formula_result = half_pi - asin_x;
-
- PrimExpr lib_result =
-
::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e);
-
- PrimExpr lower = make_const(x.dtype(), -1.0);
- PrimExpr upper = make_const(x.dtype(), 1.0);
- PrimExpr out_range = tirx::Or(x<lower, x> upper);
- PrimExpr nan_const = make_const(x.dtype(),
std::numeric_limits<double>::quiet_NaN());
-
- return tirx::Select(out_range, nan_const, tirx::Select(use_lib,
lib_result, formula_result));
+ return
::tvm::codegen::intrin::DispatchPureExtern<::tvm::codegen::intrin::FloatSuffix>(e);
});
TVM_REGISTER_OP("tirx.atan")
diff --git a/tests/python/relax/test_frontend_onnx.py
b/tests/python/relax/test_frontend_onnx.py
index 26daeff46d..d73ec5bae5 100644
--- a/tests/python/relax/test_frontend_onnx.py
+++ b/tests/python/relax/test_frontend_onnx.py
@@ -724,9 +724,9 @@ def test_bitwise_shift(direction: str):
"Sinh",
"Cosh",
"Tanh",
- # "Asin", // TODO @jikechao, fix the precision loss due to the Taylor
approximation
- # "Acos",
- # "Atan",
+ "Asin",
+ "Acos",
+ # "Atan", // TODO: fix x²+1 overflow in llvm legalize for huge inputs
(issue #19560)
"Asinh",
"Acosh",
"Atanh",