[
https://issues.apache.org/jira/browse/CALCITE-6400?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17845845#comment-17845845
]
hongyu guo commented on CALCITE-6400:
-------------------------------------
This summary makes me confused, I don't know `map_entries` should or should not
see `null` as a legal map key without reading your code.
> MAP_ENTRIES allows null as a map key
> ------------------------------------
>
> Key: CALCITE-6400
> URL: https://issues.apache.org/jira/browse/CALCITE-6400
> Project: Calcite
> Issue Type: Bug
> Components: core
> Affects Versions: 1.36.0
> Reporter: Caican Cai
> Assignee: Caican Cai
> Priority: Major
> Labels: pull-request-available
> Fix For: 1.38.0
>
>
> map_entries does not allow null as the key value of the map. The null
> mentioned here is a subset of the set, not that the set is empty.
> {code:java}
> scala> val df = spark.sql("select map_entries(map('foo', 1, null, 2.0))")
> df: org.apache.spark.sql.DataFrame = [map_entries(map(foo, 1, NULL, 2.0)):
> array<struct<key:string,value:decimal(11,1)>>]
> scala> df.show()
> org.apache.spark.SparkRuntimeException: [NULL_MAP_KEY] Cannot use null as map
> key.
> at
> org.apache.spark.sql.errors.QueryExecutionErrors$.nullAsMapKeyNotAllowedError(QueryExecutionErrors.scala:445)
> at
> org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder.put(ArrayBasedMapBuilder.scala:56)
> at
> org.apache.spark.sql.catalyst.expressions.CreateMap.eval(complexTypeCreator.scala:248)
> at
> org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:542)
> at
> org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:542)
> at
> org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.org$apache$spark$sql$catalyst$optimizer$ConstantFolding$$constantFolding(expressions.scala:80)
> at
> org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.$anonfun$constantFolding$4(expressions.scala:90)
> at
> org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1249)
> at
> org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1248)
> at
> org.apache.spark.sql.catalyst.expressions.UnaryExpression.mapChildren(Expression.scala:532)
> at
> org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.org$apache$spark$sql$catalyst$optimizer$ConstantFolding$$constantFolding(expressions.scala:90)
> at
> org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.$anonfun$applyOrElse$1(expressions.scala:94)
> at
> org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$1(QueryPlan.scala:207)
> at
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
> at
> org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:207)
> at
> org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:218)
> at
> org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$3(QueryPlan.scala:223)
> at
> scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
> at scala.collection.immutable.List.foreach(List.scala:431)
> at scala.collection.TraversableLike.map(TraversableLike.scala:286)
> at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
> at scala.collection.immutable.List.map(List.scala:305)
> at
> org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:223)
> at
> org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$4(QueryPlan.scala:228)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:355)
> at
> org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:228)
> at
> org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.applyOrElse(expressions.scala:94)
> at
> org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.applyOrElse(expressions.scala:93)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:512)
> at
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:512)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:31)
> at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformWithPruning(TreeNode.scala:478)
> at
> org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.apply(expressions.scala:93)
> at
> org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.apply(expressions.scala:46)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)
> at
> scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
> at
> scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
> at scala.collection.immutable.List.foldLeft(List.scala:91)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)
> at scala.collection.immutable.List.foreach(List.scala:431)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)
> at
> org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:88)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)
> at
> org.apache.spark.sql.execution.QueryExecution.$anonfun$optimizedPlan$1(QueryExecution.scala:143)
> at
> org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
> at
> org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:202)
> at
> org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:526)
> at
> org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:202)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
> at
> org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:201)
> at
> org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:139)
> at
> org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:135)
> at
> org.apache.spark.sql.execution.QueryExecution.assertOptimized(QueryExecution.scala:153)
> at
> org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:171)
> at
> org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:168)
> at
> org.apache.spark.sql.execution.QueryExecution.simpleString(QueryExecution.scala:221)
> at
> org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:266)
> at
> org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:235)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:112)
> at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:195)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:103)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
> at org.apache.spark.sql.Dataset.withAction(Dataset.scala:4204)
> at org.apache.spark.sql.Dataset.head(Dataset.scala:3200)
> at org.apache.spark.sql.Dataset.take(Dataset.scala:3421)
> at org.apache.spark.sql.Dataset.getRows(Dataset.scala:283)
> at org.apache.spark.sql.Dataset.showString(Dataset.scala:322)
> at org.apache.spark.sql.Dataset.show(Dataset.scala:807)
> at org.apache.spark.sql.Dataset.show(Dataset.scala:766)
> at org.apache.spark.sql.Dataset.show(Dataset.scala:775)
> ... 47 elided
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)