This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 55bc99fb6 Fix: map_from_arrays() with NULL inputs causes native crash
(#3356)
55bc99fb6 is described below
commit 55bc99fb650cc4af0d0dbb974c6766a5f5187de2
Author: Kazantsev Maksim <[email protected]>
AuthorDate: Tue Mar 17 19:54:09 2026 +0400
Fix: map_from_arrays() with NULL inputs causes native crash (#3356)
---
.../main/scala/org/apache/comet/serde/maps.scala | 34 ++++++++++++++++---
.../sql-tests/expressions/map/map_from_arrays.sql | 39 ++++++++++++++++++----
2 files changed, 63 insertions(+), 10 deletions(-)
diff --git a/spark/src/main/scala/org/apache/comet/serde/maps.scala
b/spark/src/main/scala/org/apache/comet/serde/maps.scala
index 34e76215f..ceafc157c 100644
--- a/spark/src/main/scala/org/apache/comet/serde/maps.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/maps.scala
@@ -22,7 +22,7 @@ package org.apache.comet.serde
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types._
-import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal,
optExprWithInfo, scalarFunctionExprToProto,
scalarFunctionExprToProtoWithReturnType}
+import org.apache.comet.serde.QueryPlanSerde.{createBinaryExpr,
exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto}
object CometMapKeys extends CometExpressionSerde[MapKeys] {
@@ -84,9 +84,35 @@ object CometMapFromArrays extends
CometExpressionSerde[MapFromArrays] {
val keyType = expr.left.dataType.asInstanceOf[ArrayType].elementType
val valueType = expr.right.dataType.asInstanceOf[ArrayType].elementType
val returnType = MapType(keyType = keyType, valueType = valueType)
- val mapFromArraysExpr =
- scalarFunctionExprToProtoWithReturnType("map", returnType, false,
keysExpr, valuesExpr)
- optExprWithInfo(mapFromArraysExpr, expr, expr.children: _*)
+ for {
+ andBinaryExprProto <- createAndBinaryExpr(expr, inputs, binding)
+ mapFromArraysExprProto <- scalarFunctionExprToProto("map", keysExpr,
valuesExpr)
+ nullLiteralExprProto <- exprToProtoInternal(Literal(null, returnType),
inputs, binding)
+ } yield {
+ val caseWhenExprProto = ExprOuterClass.CaseWhen
+ .newBuilder()
+ .addWhen(andBinaryExprProto)
+ .addThen(mapFromArraysExprProto)
+ .setElseExpr(nullLiteralExprProto)
+ .build()
+ ExprOuterClass.Expr
+ .newBuilder()
+ .setCaseWhen(caseWhenExprProto)
+ .build()
+ }
+ }
+
+ private def createAndBinaryExpr(
+ expr: MapFromArrays,
+ inputs: Seq[Attribute],
+ binding: Boolean): Option[ExprOuterClass.Expr] = {
+ createBinaryExpr(
+ expr,
+ IsNotNull(expr.left),
+ IsNotNull(expr.right),
+ inputs,
+ binding,
+ (builder, binaryExpr) => builder.setAnd(binaryExpr))
}
}
diff --git
a/spark/src/test/resources/sql-tests/expressions/map/map_from_arrays.sql
b/spark/src/test/resources/sql-tests/expressions/map/map_from_arrays.sql
index 5d6ac3d55..3016eb5ff 100644
--- a/spark/src/test/resources/sql-tests/expressions/map/map_from_arrays.sql
+++ b/spark/src/test/resources/sql-tests/expressions/map/map_from_arrays.sql
@@ -21,16 +21,43 @@ statement
CREATE TABLE test_map_from_arrays(k array<string>, v array<int>) USING parquet
statement
-INSERT INTO test_map_from_arrays VALUES (array('a', 'b', 'c'), array(1, 2,
3)), (array(), array()), (NULL, NULL)
+INSERT INTO test_map_from_arrays VALUES
+ (array('a', 'b', 'c'), array(1, 2, 3)),
+ (array(), array()),
+ (NULL, NULL),
+ (array('x'), NULL),
+ (NULL, array(99))
+-- basic functionality
query spark_answer_only
-SELECT map_from_arrays(k, v) FROM test_map_from_arrays WHERE k IS NOT NULL
+SELECT map_from_arrays(k, v) FROM test_map_from_arrays WHERE k IS NOT NULL AND
v IS NOT NULL
--- Comet bug: map_from_arrays(NULL, NULL) causes native crash "map key cannot
be null"
--- https://github.com/apache/datafusion-comet/issues/3327
-query ignore(https://github.com/apache/datafusion-comet/issues/3327)
-SELECT map_from_arrays(k, v) FROM test_map_from_arrays WHERE k IS NULL
+-- both inputs NULL should return NULL
+query
+SELECT map_from_arrays(k, v) FROM test_map_from_arrays WHERE k IS NULL AND v
IS NULL
+
+-- keys not null but values null should return NULL (Spark behavior)
+query
+SELECT map_from_arrays(k, v) FROM test_map_from_arrays WHERE k IS NOT NULL AND
v IS NULL
+
+-- keys null but values not null should return NULL (Spark behavior)
+query
+SELECT map_from_arrays(k, v) FROM test_map_from_arrays WHERE k IS NULL AND v
IS NOT NULL
+
+-- all rows including nulls
+query spark_answer_only
+SELECT map_from_arrays(k, v) FROM test_map_from_arrays
-- literal arguments
query spark_answer_only
SELECT map_from_arrays(array('a', 'b'), array(1, 2))
+
+-- literal null arguments
+query
+SELECT map_from_arrays(NULL, array(1, 2))
+
+query
+SELECT map_from_arrays(array('a'), NULL)
+
+query
+SELECT map_from_arrays(NULL, NULL)
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]