[
https://issues.apache.org/jira/browse/SPARK-49349?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Max Gekk resolved SPARK-49349.
------------------------------
Fix Version/s: 4.0.0
Resolution: Fixed
Issue resolved by pull request 48915
[https://github.com/apache/spark/pull/48915]
> Block LCA with Generate
> -----------------------
>
> Key: SPARK-49349
> URL: https://issues.apache.org/jira/browse/SPARK-49349
> Project: Spark
> Issue Type: Sub-task
> Components: SQL
> Affects Versions: 4.0.0, 3.5.2
> Reporter: Yuming Wang
> Assignee: Yuming Wang
> Priority: Major
> Labels: pull-request-available
> Fix For: 4.0.0
>
>
> How to reproduce:
> {code:scala}
> spark.sql("CREATE TABLE tbl (data_json_array ARRAY<MAP<STRING, STRING>>)
> USING parquet")
> spark.sql(
> """
> |select
> | event_dtl ['EventSubType'] event_sub_type,
> | from_json(
> | event_dtl ['instrumentData'],
> | 'map<string,string>'
> | ) instrument_data_dtl,
> | explode(
> | map_entries(
> | map_filter(
> | instrument_data_dtl,
> | (k, v) -> substr(v, 1, 1) = '['
> | )
> | )
> | ) as instrument_array_dtl,
> | instrument_array_dtl ['key'] instrument_msg_key,
> | case
> | when instrument_msg_key = 'VALID_MESG' then 'Y'
> | else 'N'
> | end as message_qualified,
> | case
> | when instrument_msg_key <> 'VALID_MESG' then instrument_msg_key
> | end as disqualification_reason
> |from tbl lateral view explode(data_json_array) as event_dtl
> |""".stripMargin).show
> {code}
> Exception:
> {noformat}
> org.apache.spark.sql.catalyst.ExtendedAnalysisException: Invalid call to
> dataType on unresolved object;
> 'Project [event_dtl#7[EventSubType] AS event_sub_type#0,
> from_json(MapType(StringType,StringType,true), event_dtl#7[instrumentData],
> Some(Asia/Shanghai)) AS instrument_data_dtl#1,
> explode(map_entries(map_filter(lateralAliasReference(instrument_data_dtl),
> lambdafunction((substr(lambda v#10, 1, 1) = [), lambda k#9, lambda v#10,
> false)))) AS instrument_array_dtl#2,
> lateralAliasReference(instrument_array_dtl)[key] AS instrument_msg_key#3,
> CASE WHEN (lateralAliasReference(instrument_msg_key) = VALID_MESG) THEN Y
> ELSE N END AS message_qualified#4, CASE WHEN NOT
> (lateralAliasReference(instrument_msg_key) = VALID_MESG) THEN
> lateralAliasReference(instrument_msg_key) END AS disqualification_reason#5]
> +- Generate explode(data_json_array#6), false, as, [event_dtl#7]
> +- SubqueryAlias spark_catalog.default.tbl
> +- Relation spark_catalog.default.tbl[data_json_array#6] parquet
> at
> org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute.dataType(unresolved.scala:227)
> at
> org.apache.spark.sql.catalyst.expressions.LateralColumnAliasReference.dataType(namedExpressions.scala:459)
> at
> org.apache.spark.sql.catalyst.expressions.BinaryOperator.checkInputDataTypes(Expression.scala:775)
> at
> org.apache.spark.sql.catalyst.expressions.BinaryComparison.checkInputDataTypes(predicates.scala:982)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$6(CheckAnalysis.scala:306)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$6$adapted(CheckAnalysis.scala:297)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:244)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:243)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:243)
> at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:243)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:243)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:243)
> at scala.collection.Iterator.foreach(Iterator.scala:943)
> at scala.collection.Iterator.foreach$(Iterator.scala:943)
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
> at scala.collection.IterableLike.foreach(IterableLike.scala:74)
> at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
> at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:243)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$5(CheckAnalysis.scala:297)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$5$adapted(CheckAnalysis.scala:297)
> at scala.collection.immutable.Stream.foreach(Stream.scala:533)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$2(CheckAnalysis.scala:297)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$2$adapted(CheckAnalysis.scala:215)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:244)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis0(CheckAnalysis.scala:215)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis0$(CheckAnalysis.scala:197)
> at
> org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis0(Analyzer.scala:202)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:193)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:171)
> at
> org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:202)
> at
> org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:225)
> at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)
> at
> org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:222)
> at
> org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:77)
> at
> org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)
> at
> org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:219)
> at
> org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546)
> at
> org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:219)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
> at
> org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:218)
> at
> org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:77)
> at
> org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:74)
> at
> org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:66)
> at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
> at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
> at org.apache.spark.sql.SparkSession.$anonfun$sql$4(SparkSession.scala:691)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
> at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:682)
> at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:713)
> at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:744)
> ... 72 elided
> {noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]