This is an automated email from the ASF dual-hosted git repository.
ptoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a5ad1a7c9ed2 [SPARK-55557][SQL] Hyperbolic functions should not
overflow with large inputs
a5ad1a7c9ed2 is described below
commit a5ad1a7c9ed22201fb4be1d8b5fba7f654918424
Author: Marco Gaido <[email protected]>
AuthorDate: Sat Mar 14 11:06:30 2026 +0100
[SPARK-55557][SQL] Hyperbolic functions should not overflow with large
inputs
### What changes were proposed in this pull request?
As mentioned in https://issues.apache.org/jira/browse/SPARK-55557, for
large values (namely, larger than the square root of `Double.MAX:_VALUE`) the
`asinh` and `acosh` functions return Infinity, due to overflow. This happens
because of the `x * x` operation in the formula to compute them. However, for
such large numbers, the expression can be simplified. Indeed, `x +\- 1` for
such large numbers has no effect, since the precision is not enough to be
sensitive to this operaiton. Hence, [...]
### Why are the changes needed?
Current code overflows and returns Infinity for asinh and acosh for large
values.
### Does this PR introduce _any_ user-facing change?
Yes, SQL with asinh and acosh with large values do not return infinity
anymore. For large negative values as input of acosh, `NaN` is returned.
### How was this patch tested?
Added UTs.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #54676 from mgaido91/SPARK-55557.
Authored-by: Marco Gaido <[email protected]>
Signed-off-by: Peter Toth <[email protected]>
---
.../sql/catalyst/expressions/mathExpressions.scala | 40 ++++++++++++++++++----
.../expressions/MathExpressionsSuite.scala | 17 ++++++++-
2 files changed, 49 insertions(+), 8 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index 0643e5fba2f3..9ea38b8bfe91 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -418,10 +418,28 @@ case class Cosh(child: Expression) extends
UnaryMathExpression(math.cosh, "COSH"
since = "3.0.0",
group = "math_funcs")
case class Acosh(child: Expression)
- extends UnaryMathExpression((x: Double) => StrictMath.log(x + math.sqrt(x *
x - 1.0)), "ACOSH") {
+ extends UnaryMathExpression((x: Double) => x match {
+ // in case of large values, the square would lead to Infinity; also, - 1
would be ignored due
+ // to numeric precision. So log(x + sqrt(x * x - 1)) becomes log(2x) =
log(2) + log(x) for
+ // positive values.
+ case x if x >= Math.sqrt(Double.MaxValue) =>
+ StrictMath.log(2) + StrictMath.log(x)
+ case x if x < 1 =>
+ Double.NaN
+ case _ => StrictMath.log(x + math.sqrt(x * x - 1.0)) }, "ACOSH") {
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
- defineCodeGen(ctx, ev,
- c => s"java.lang.StrictMath.log($c + java.lang.Math.sqrt($c * $c -
1.0))")
+ nullSafeCodeGen(ctx, ev, c => {
+ val sm = "java.lang.StrictMath"
+ s"""
+ |if ($c >= ${Math.sqrt(Double.MaxValue)}) {
+ | ${ev.value} = $sm.log($c) + $sm.log(2);
+ |} else if ($c < 1) {
+ | ${ev.value} = java.lang.Double.NaN;
+ |} else {
+ | ${ev.value} = $sm.log($c + java.lang.Math.sqrt($c * $c - 1.0));
+ |}
+ |""".stripMargin
+ })
}
override protected def withNewChildInternal(newChild: Expression): Acosh =
copy(child = newChild)
}
@@ -848,12 +866,20 @@ case class Sinh(child: Expression) extends
UnaryMathExpression(math.sinh, "SINH"
group = "math_funcs")
case class Asinh(child: Expression)
extends UnaryMathExpression((x: Double) => x match {
- case Double.NegativeInfinity => Double.NegativeInfinity
+ // in case of large values, the square would lead to Infinity; also, + 1
would be ignored due
+ // to numeric precision. So log(x + sqrt(x * x + 1)) becomes log(2x) =
log(2) + log(x) for
+ // positive values. Since the function is symmetric, for large values we
can use
+ // signum(x) + log(2|x|)
+ case x if Math.abs(x) >= Math.sqrt(Double.MaxValue) - 1 =>
+ Math.signum(x) * (StrictMath.log(2) + StrictMath.log(Math.abs(x)))
case _ => StrictMath.log(x + math.sqrt(x * x + 1.0)) }, "ASINH") {
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
- defineCodeGen(ctx, ev, c =>
- s"$c == Double.NEGATIVE_INFINITY ? Double.NEGATIVE_INFINITY : " +
- s"java.lang.StrictMath.log($c + java.lang.Math.sqrt($c * $c + 1.0))")
+ defineCodeGen(ctx, ev, c => {
+ val sm = "java.lang.StrictMath"
+ s"$sm.abs($c) >= ${Math.sqrt(Double.MaxValue) - 1} ? " +
+ s"$sm.signum($c) * ($sm.log($sm.abs($c)) + $sm.log(2)) :" +
+ s"$sm.log($c + java.lang.Math.sqrt($c * $c + 1.0))"
+ })
}
override protected def withNewChildInternal(newChild: Expression): Asinh =
copy(child = newChild)
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
index 71787bb12130..f6a40406e668 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
@@ -280,7 +280,9 @@ class MathExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
}
test("acosh") {
- testUnary(Acosh, (x: Double) => StrictMath.log(x + math.sqrt(x * x - 1.0)))
+ def f: (Double) => Double = (x: Double) => StrictMath.log(x + math.sqrt(x
* x - 1.0))
+ testUnary(Acosh, f, (10 to 20).map(_ * 0.1))
+ testUnary(Acosh, f, (-20 to 9).map(_ * 0.1), expectNaN = true)
checkConsistencyBetweenInterpretedAndCodegen(Cosh, DoubleType)
val nullLit = Literal.create(null, NullType)
@@ -1010,4 +1012,17 @@ class MathExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
s"Expression $expr should be context independent foldable")
}
}
+
+ test("SPARK-55557: hyperbolic functions should not overflow with large
inputs") {
+ checkEvaluation(Asinh(Double.MaxValue), 710.4758600739439)
+ checkEvaluation(Asinh(Math.sqrt(Double.MaxValue)), 355.58450362725193)
+ checkEvaluation(Acosh(Double.MaxValue), 710.4758600739439)
+ checkEvaluation(Acosh(Math.sqrt(Double.MaxValue)), 355.58450362725193)
+ checkEvaluation(Asinh(Double.MinValue), -710.4758600739439)
+ checkEvaluation(Asinh(-Math.sqrt(Double.MaxValue)), -355.58450362725193)
+ checkNaN(Acosh(Double.MinValue))
+ checkNaN(Acosh(-Math.sqrt(Double.MaxValue) + 1))
+ checkNaN(Acosh(-Math.sqrt(Double.MaxValue) + 2))
+ }
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]