This is an automated email from the ASF dual-hosted git repository.

kazuyukitanimura pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 6f9b56ab Use IfExpr to check when input to log2 is <=0 and return null 
(#506)
6f9b56ab is described below

commit 6f9b56abe8fa9eaa127ccce1f76c083185dbd060
Author: Pedro M Duarte <[email protected]>
AuthorDate: Mon Jul 15 19:18:05 2024 -0400

    Use IfExpr to check when input to log2 is <=0 and return null (#506)
    
    ## Which issue does this PR close?
    
    Closes #485
    
    ## Rationale for this change
    
    Compatibility with how Spark handles logarithms of values <=0.
    
    ## What changes are included in this PR?
    
    Use IfExpr to check when input to log2 is <=0 and return null.  This is 
done to match Spark's behavior, which in turn is implemented to match Hive's 
behavior.
    
    ## How are these changes tested?
    
    The existing test for `ln`, `log2` and `log10` was modified so that it 
includes negative numbers as part of the inputs being tested.
---
 docs/source/user-guide/expressions.md                      |  6 +++---
 .../main/scala/org/apache/comet/serde/QueryPlanSerde.scala | 14 +++++++++++---
 .../test/scala/org/apache/comet/CometExpressionSuite.scala |  2 +-
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/docs/source/user-guide/expressions.md 
b/docs/source/user-guide/expressions.md
index 07323382..e44f6731 100644
--- a/docs/source/user-guide/expressions.md
+++ b/docs/source/user-guide/expressions.md
@@ -120,9 +120,9 @@ The following Spark expressions are currently available. 
Any known compatibility
 | Exp        |                                                                 
    |
 | Floor      |                                                                 
    |
 | IsNaN      |                                                                 
    |
-| Log        | log(0) will produce `-Infinity` unlike Spark which returns 
`null`   |
-| Log2       | log2(0) will produce `-Infinity` unlike Spark which returns 
`null`  |
-| Log10      | log10(0) will produce `-Infinity` unlike Spark which returns 
`null` |
+| Log        |                                                                 
    |
+| Log2       |                                                                 
    |
+| Log10      |                                                                 
    |
 | Pow        |                                                                 
    |
 | Round      |                                                                 
    |
 | Signum     | Signum does not differentiate between `0.0` and `-0.0`          
    |
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala 
b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index da534b02..b417239d 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -1703,18 +1703,21 @@ object QueryPlanSerde extends Logging with 
ShimQueryPlanSerde with CometExprShim
               optExprWithInfo(optExpr, expr, child)
           }
 
+        // The expression for `log` functions is defined as null on numbers 
less than or equal
+        // to 0. This matches Spark and Hive behavior, where non positive 
values eval to null
+        // instead of NaN or -Infinity.
         case Log(child) =>
-          val childExpr = exprToProtoInternal(child, inputs)
+          val childExpr = exprToProtoInternal(nullIfNegative(child), inputs)
           val optExpr = scalarExprToProto("ln", childExpr)
           optExprWithInfo(optExpr, expr, child)
 
         case Log10(child) =>
-          val childExpr = exprToProtoInternal(child, inputs)
+          val childExpr = exprToProtoInternal(nullIfNegative(child), inputs)
           val optExpr = scalarExprToProto("log10", childExpr)
           optExprWithInfo(optExpr, expr, child)
 
         case Log2(child) =>
-          val childExpr = exprToProtoInternal(child, inputs)
+          val childExpr = exprToProtoInternal(nullIfNegative(child), inputs)
           val optExpr = scalarExprToProto("log2", childExpr)
           optExprWithInfo(optExpr, expr, child)
 
@@ -2393,6 +2396,11 @@ object QueryPlanSerde extends Logging with 
ShimQueryPlanSerde with CometExprShim
     expression
   }
 
+  def nullIfNegative(expression: Expression): Expression = {
+    val zero = Literal.default(expression.dataType)
+    If(LessThanOrEqual(expression, zero), Literal.create(null, 
expression.dataType), expression)
+  }
+
   /**
    * Returns true if given datatype is supported as a key in DataFusion sort 
merge join.
    */
diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala 
b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
index cb2069ed..65217767 100644
--- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
@@ -819,7 +819,7 @@ class CometExpressionSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
     Seq("true", "false").foreach { dictionary =>
       withSQLConf("parquet.enable.dictionary" -> dictionary) {
         withParquetTable(
-          (0 until 5).map(i => (i.toDouble + 0.3, i.toDouble + 0.8)),
+          (-5 until 5).map(i => (i.toDouble + 0.3, i.toDouble + 0.8)),
           "tbl",
           withDictionary = dictionary.toBoolean) {
           checkSparkAnswerWithTol(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to