[
https://issues.apache.org/jira/browse/SPARK-45943?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17905291#comment-17905291
]
Asif commented on SPARK-45943:
------------------------------
The exception seen is:
[INTERNAL_ERROR] BUG: computeStats called before pushdown on DSv2 relation:
testcat.store_sales SQLSTATE: XX000
org.apache.spark.SparkException: [INTERNAL_ERROR] BUG: computeStats called
before pushdown on DSv2 relation: testcat.store_sales SQLSTATE: XX000
at org.apache.spark.SparkException$.internalError(SparkException.scala:92)
at org.apache.spark.SparkException$.internalError(SparkException.scala:96)
at
org.apache.spark.sql.execution.datasources.v2.DataSourceV2RelationBase.computeStats(DataSourceV2Relation.scala:81)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.default(SizeInBytesOnlyStatsPlanVisitor.scala:56)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.default(SizeInBytesOnlyStatsPlanVisitor.scala:28)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit(LogicalPlanVisitor.scala:49)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit$(LogicalPlanVisitor.scala:25)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visit(SizeInBytesOnlyStatsPlanVisitor.scala:28)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.$anonfun$stats$1(LogicalPlanStats.scala:37)
at scala.Option.getOrElse(Option.scala:201)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats(LogicalPlanStats.scala:33)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats$(LogicalPlanStats.scala:33)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.stats(LogicalPlan.scala:37)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.$anonfun$default$1(SizeInBytesOnlyStatsPlanVisitor.scala:58)
at scala.collection.immutable.Vector1.map(Vector.scala:2141)
at scala.collection.immutable.Vector1.map(Vector.scala:386)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.default(SizeInBytesOnlyStatsPlanVisitor.scala:58)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visitJoin(SizeInBytesOnlyStatsPlanVisitor.scala:124)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visitJoin(SizeInBytesOnlyStatsPlanVisitor.scala:28)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit(LogicalPlanVisitor.scala:35)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit$(LogicalPlanVisitor.scala:25)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visit(SizeInBytesOnlyStatsPlanVisitor.scala:28)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.$anonfun$stats$1(LogicalPlanStats.scala:37)
at scala.Option.getOrElse(Option.scala:201)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats(LogicalPlanStats.scala:33)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats$(LogicalPlanStats.scala:33)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.stats(LogicalPlan.scala:37)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.$anonfun$default$1(SizeInBytesOnlyStatsPlanVisitor.scala:58)
at scala.collection.immutable.Vector1.map(Vector.scala:2141)
at scala.collection.immutable.Vector1.map(Vector.scala:386)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.default(SizeInBytesOnlyStatsPlanVisitor.scala:58)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visitJoin(SizeInBytesOnlyStatsPlanVisitor.scala:124)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visitJoin(SizeInBytesOnlyStatsPlanVisitor.scala:28)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit(LogicalPlanVisitor.scala:35)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit$(LogicalPlanVisitor.scala:25)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visit(SizeInBytesOnlyStatsPlanVisitor.scala:28)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.$anonfun$stats$1(LogicalPlanStats.scala:37)
at scala.Option.getOrElse(Option.scala:201)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats(LogicalPlanStats.scala:33)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats$(LogicalPlanStats.scala:33)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.stats(LogicalPlan.scala:37)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visitUnaryNode(SizeInBytesOnlyStatsPlanVisitor.scala:40)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visitProject(SizeInBytesOnlyStatsPlanVisitor.scala:149)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visitProject(SizeInBytesOnlyStatsPlanVisitor.scala:28)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit(LogicalPlanVisitor.scala:38)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit$(LogicalPlanVisitor.scala:25)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visit(SizeInBytesOnlyStatsPlanVisitor.scala:28)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.$anonfun$stats$1(LogicalPlanStats.scala:37)
at scala.Option.getOrElse(Option.scala:201)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats(LogicalPlanStats.scala:33)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats$(LogicalPlanStats.scala:33)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.stats(LogicalPlan.scala:37)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visitJoin(SizeInBytesOnlyStatsPlanVisitor.scala:115)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visitJoin(SizeInBytesOnlyStatsPlanVisitor.scala:28)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit(LogicalPlanVisitor.scala:35)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit$(LogicalPlanVisitor.scala:25)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visit(SizeInBytesOnlyStatsPlanVisitor.scala:28)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.$anonfun$stats$1(LogicalPlanStats.scala:37)
at scala.Option.getOrElse(Option.scala:201)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats(LogicalPlanStats.scala:33)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats$(LogicalPlanStats.scala:33)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.stats(LogicalPlan.scala:37)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visitUnaryNode(SizeInBytesOnlyStatsPlanVisitor.scala:40)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visitAggregate(SizeInBytesOnlyStatsPlanVisitor.scala:67)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visitAggregate(SizeInBytesOnlyStatsPlanVisitor.scala:28)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit(LogicalPlanVisitor.scala:26)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit$(LogicalPlanVisitor.scala:25)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visit(SizeInBytesOnlyStatsPlanVisitor.scala:28)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.$anonfun$stats$1(LogicalPlanStats.scala:37)
at scala.Option.getOrElse(Option.scala:201)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats(LogicalPlanStats.scala:33)
at
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats$(LogicalPlanStats.scala:33)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.stats(LogicalPlan.scala:37)
at
org.apache.spark.sql.catalyst.optimizer.JoinSelectionHelper.canBroadcastBySize(joins.scala:360)
at
org.apache.spark.sql.catalyst.optimizer.JoinSelectionHelper.canBroadcastBySize$(joins.scala:359)
at
org.apache.spark.sql.catalyst.optimizer.PushDownLeftSemiAntiJoin$.canBroadcastBySize(PushDownLeftSemiAntiJoin.scala:35)
at
org.apache.spark.sql.catalyst.optimizer.JoinSelectionHelper.getBroadcastBuildSide(joins.scala:299)
at
org.apache.spark.sql.catalyst.optimizer.JoinSelectionHelper.getBroadcastBuildSide$(joins.scala:292)
at
org.apache.spark.sql.catalyst.optimizer.PushDownLeftSemiAntiJoin$.getBroadcastBuildSide(PushDownLeftSemiAntiJoin.scala:35)
> DataSourceV2Relation.computeStats throws IllegalStateException in test mode
> ---------------------------------------------------------------------------
>
> Key: SPARK-45943
> URL: https://issues.apache.org/jira/browse/SPARK-45943
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 3.5.1
> Reporter: Asif
> Assignee: Zhen Wang
> Priority: Major
> Labels: pull-request-available
> Fix For: 4.0.0, 3.5.1
>
>
> This issue surfaces when the new unit test of PR
> SPARK-45866|https://github.com/apache/spark/pull/43824] is added
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]