maropu commented on a change in pull request #25260: [SPARK-28520][SQL]
WholeStageCodegen does not work property for LocalTableScanExec
URL: https://github.com/apache/spark/pull/25260#discussion_r307957499
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
##########
@@ -325,4 +326,68 @@ class WholeStageCodegenSuite extends QueryTest with
SharedSQLContext {
checkAnswer(groupByWithId, Seq(Row(1, 2, 0), Row(1, 2, 0)))
}
}
+
+ test("WholeStageCodegen does not work properly for LocalTableScanExec") {
+ // Case1: LocalTableScanExec is the root of a query plan tree.
+ // In this case, WholeStageCodegenExec should not be inserted
+ // as the direct parent of LocalTableScanExec.
+ val df1 = spark.createDataset(1 to 10).toDF
+ val rootOfExecutedPlan = df1.queryExecution.executedPlan
+
+ // Ensure WholeStageCodegenExec is not inserted and
+ // LocalTableScanExec is still the root.
+ assert(!rootOfExecutedPlan.isInstanceOf[WholeStageCodegenExec],
+ "WholeStageCodegenExec should not be inserted if LocalTableScanExec is
the only plan.")
+ assert(rootOfExecutedPlan.isInstanceOf[LocalTableScanExec],
+ "LocalTableScanExec should be still the root.")
+
+ // Case2: The parent of a LocalTableScanExec supports WholeStageCodegen.
+ // In this case, the LocalTableScanExec should be within a
WholeStageCodegen domain
+ // and no more InputAdapter is inserted as the direct parent of the
LocalTableScanExec.
+ val leftDF = spark.createDataset(1 to 10).toDF
+ val rightDF = spark.createDataset(1 to 10).toDF
+
+ // Force BroadcastHasJoin enabled
+ withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key ->
Long.MaxValue.toString) {
+ val joinedDF = leftDF.join(rightDF, leftDF("value") === rightDF("value"))
+ val executedPlan = joinedDF.queryExecution.executedPlan
+
+ // BroadcastHashJoinExec supports WholeStageCodegen and it's the parent
of
+ // LocalTableScanExec so LocalTableScanExec should be within a
WholeStageCodegen domain.
+ assert(
+ executedPlan.find {
+ case WholeStageCodegenExec(
+ BroadcastHashJoinExec(_, _, _, _, _, _: LocalTableScanExec, _)) =>
true
+ case _ => false
+ }.isDefined,
+ "LocalTableScanExec is not within a WholeStageCodegen domain.")
+
+ // No more InputAdapter inserted between LocalTableScanExec and its
parent.
+ assert(
+ executedPlan.find {
+ case InputAdapter(_: LocalTableScanExec, _) => true
+ case _ => false
+ }.isEmpty,
+ "InputAdapter should not be inserted.")
+ }
+
+ // Case3: The parent of a plan of LocalTableScanExec does not support
WholeStageCodegen.
Review comment:
IMO the cases 1/2 are enough for tests.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]