This is an automated email from the ASF dual-hosted git repository.
kazuyukitanimura pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 6f9b56ab Use IfExpr to check when input to log2 is <=0 and return null
(#506)
6f9b56ab is described below
commit 6f9b56abe8fa9eaa127ccce1f76c083185dbd060
Author: Pedro M Duarte <[email protected]>
AuthorDate: Mon Jul 15 19:18:05 2024 -0400
Use IfExpr to check when input to log2 is <=0 and return null (#506)
## Which issue does this PR close?
Closes #485
## Rationale for this change
Compatibility with how Spark handles logarithms of values <=0.
## What changes are included in this PR?
Use IfExpr to check when input to log2 is <=0 and return null. This is
done to match Spark's behavior, which in turn is implemented to match Hive's
behavior.
## How are these changes tested?
The existing test for `ln`, `log2` and `log10` was modified so that it
includes negative numbers as part of the inputs being tested.
---
docs/source/user-guide/expressions.md | 6 +++---
.../main/scala/org/apache/comet/serde/QueryPlanSerde.scala | 14 +++++++++++---
.../test/scala/org/apache/comet/CometExpressionSuite.scala | 2 +-
3 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/docs/source/user-guide/expressions.md
b/docs/source/user-guide/expressions.md
index 07323382..e44f6731 100644
--- a/docs/source/user-guide/expressions.md
+++ b/docs/source/user-guide/expressions.md
@@ -120,9 +120,9 @@ The following Spark expressions are currently available.
Any known compatibility
| Exp |
|
| Floor |
|
| IsNaN |
|
-| Log | log(0) will produce `-Infinity` unlike Spark which returns
`null` |
-| Log2 | log2(0) will produce `-Infinity` unlike Spark which returns
`null` |
-| Log10 | log10(0) will produce `-Infinity` unlike Spark which returns
`null` |
+| Log |
|
+| Log2 |
|
+| Log10 |
|
| Pow |
|
| Round |
|
| Signum | Signum does not differentiate between `0.0` and `-0.0`
|
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index da534b02..b417239d 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -1703,18 +1703,21 @@ object QueryPlanSerde extends Logging with
ShimQueryPlanSerde with CometExprShim
optExprWithInfo(optExpr, expr, child)
}
+ // The expression for `log` functions is defined as null on numbers
less than or equal
+ // to 0. This matches Spark and Hive behavior, where non positive
values eval to null
+ // instead of NaN or -Infinity.
case Log(child) =>
- val childExpr = exprToProtoInternal(child, inputs)
+ val childExpr = exprToProtoInternal(nullIfNegative(child), inputs)
val optExpr = scalarExprToProto("ln", childExpr)
optExprWithInfo(optExpr, expr, child)
case Log10(child) =>
- val childExpr = exprToProtoInternal(child, inputs)
+ val childExpr = exprToProtoInternal(nullIfNegative(child), inputs)
val optExpr = scalarExprToProto("log10", childExpr)
optExprWithInfo(optExpr, expr, child)
case Log2(child) =>
- val childExpr = exprToProtoInternal(child, inputs)
+ val childExpr = exprToProtoInternal(nullIfNegative(child), inputs)
val optExpr = scalarExprToProto("log2", childExpr)
optExprWithInfo(optExpr, expr, child)
@@ -2393,6 +2396,11 @@ object QueryPlanSerde extends Logging with
ShimQueryPlanSerde with CometExprShim
expression
}
+ def nullIfNegative(expression: Expression): Expression = {
+ val zero = Literal.default(expression.dataType)
+ If(LessThanOrEqual(expression, zero), Literal.create(null,
expression.dataType), expression)
+ }
+
/**
* Returns true if given datatype is supported as a key in DataFusion sort
merge join.
*/
diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
index cb2069ed..65217767 100644
--- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
@@ -819,7 +819,7 @@ class CometExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
Seq("true", "false").foreach { dictionary =>
withSQLConf("parquet.enable.dictionary" -> dictionary) {
withParquetTable(
- (0 until 5).map(i => (i.toDouble + 0.3, i.toDouble + 0.8)),
+ (-5 until 5).map(i => (i.toDouble + 0.3, i.toDouble + 0.8)),
"tbl",
withDictionary = dictionary.toBoolean) {
checkSparkAnswerWithTol(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]