This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new b038ac531 chore: Respect to legacySizeOfNull option for size function 
(#3036)
b038ac531 is described below

commit b038ac5314a5b93153ccba12867d7aebb2285954
Author: Kazantsev Maksim <[email protected]>
AuthorDate: Mon Jan 12 10:02:02 2026 -0800

    chore: Respect to legacySizeOfNull option for size function (#3036)
---
 .../main/scala/org/apache/comet/serde/arrays.scala | 37 +++++++++++++++++++---
 .../apache/comet/CometArrayExpressionSuite.scala   | 19 +++++++++++
 2 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala 
b/spark/src/main/scala/org/apache/comet/serde/arrays.scala
index 5d989b4a3..b552a071d 100644
--- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala
@@ -546,7 +546,6 @@ object CometArrayFilter extends 
CometExpressionSerde[ArrayFilter] {
 object CometSize extends CometExpressionSerde[Size] {
 
   override def getSupportLevel(expr: Size): SupportLevel = {
-    // TODO respect spark.sql.legacy.sizeOfNull
     expr.child.dataType match {
       case _: ArrayType => Compatible()
       case _: MapType => Unsupported(Some("size does not support map inputs"))
@@ -554,7 +553,6 @@ object CometSize extends CometExpressionSerde[Size] {
         // this should be unreachable because Spark only supports map and 
array inputs
         Unsupported(Some(s"Unsupported child data type: $other"))
     }
-
   }
 
   override def convert(
@@ -562,10 +560,41 @@ object CometSize extends CometExpressionSerde[Size] {
       inputs: Seq[Attribute],
       binding: Boolean): Option[ExprOuterClass.Expr] = {
     val arrayExprProto = exprToProto(expr.child, inputs, binding)
+    for {
+      isNotNullExprProto <- createIsNotNullExprProto(expr, inputs, binding)
+      sizeScalarExprProto <- scalarFunctionExprToProto("size", arrayExprProto)
+      emptyLiteralExprProto <- 
createLiteralExprProto(SQLConf.get.legacySizeOfNull)
+    } yield {
+      val caseWhenExpr = ExprOuterClass.CaseWhen
+        .newBuilder()
+        .addWhen(isNotNullExprProto)
+        .addThen(sizeScalarExprProto)
+        .setElseExpr(emptyLiteralExprProto)
+        .build()
+      ExprOuterClass.Expr
+        .newBuilder()
+        .setCaseWhen(caseWhenExpr)
+        .build()
+    }
+  }
+
+  private def createIsNotNullExprProto(
+      expr: Size,
+      inputs: Seq[Attribute],
+      binding: Boolean): Option[ExprOuterClass.Expr] = {
+    createUnaryExpr(
+      expr,
+      expr.child,
+      inputs,
+      binding,
+      (builder, unaryExpr) => builder.setIsNotNull(unaryExpr))
+  }
 
-    val sizeScalarExpr = scalarFunctionExprToProto("size", arrayExprProto)
-    optExprWithInfo(sizeScalarExpr, expr)
+  private def createLiteralExprProto(legacySizeOfNull: Boolean): 
Option[ExprOuterClass.Expr] = {
+    val value = if (legacySizeOfNull) -1 else null
+    exprToProto(Literal(value, IntegerType), Seq.empty)
   }
+
 }
 
 trait ArraysBase {
diff --git 
a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala 
b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala
index 9f908e741..cf4911736 100644
--- a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.CometTestBase
 import org.apache.spark.sql.catalyst.expressions.{ArrayAppend, ArrayDistinct, 
ArrayExcept, ArrayInsert, ArrayIntersect, ArrayJoin, ArrayRepeat, 
ArraysOverlap, ArrayUnion}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.ArrayType
 
 import org.apache.comet.CometSparkSessionExtensions.{isSpark35Plus, 
isSpark40Plus}
@@ -871,4 +872,22 @@ class CometArrayExpressionSuite extends CometTestBase with 
AdaptiveSparkPlanHelp
       }
     }
   }
+
+  test("size - respect to legacySizeOfNull") {
+    val table = "t1"
+    withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> 
CometConf.SCAN_NATIVE_ICEBERG_COMPAT) {
+      withTable(table) {
+        sql(s"create table $table(col array<string>) using parquet")
+        sql(s"insert into $table values(null)")
+        withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") {
+          checkSparkAnswerAndOperator(sql(s"select size(col) from $table"))
+        }
+        withSQLConf(
+          SQLConf.LEGACY_SIZE_OF_NULL.key -> "true",
+          SQLConf.ANSI_ENABLED.key -> "false") {
+          checkSparkAnswerAndOperator(sql(s"select size(col) from $table"))
+        }
+      }
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to