leanken commented on a change in pull request #29455:
URL: https://github.com/apache/spark/pull/29455#discussion_r472592607
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/EliminateNullAwareAntiJoin.scala
##########
@@ -20,22 +20,50 @@ package org.apache.spark.sql.execution.adaptive
import
org.apache.spark.sql.catalyst.planning.ExtractSingleColumnNullAwareAntiJoin
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
import org.apache.spark.sql.execution.joins.EmptyHashedRelationWithAllNullKeys
+import
org.apache.spark.sql.execution.metric.SQLShuffleWriteMetricsReporter.SHUFFLE_RECORDS_WRITTEN
/**
- * This optimization rule detects and convert a NAAJ to an Empty LocalRelation
- * when buildSide is EmptyHashedRelationWithAllNullKeys.
+ * This optimization rule try to Eliminate NAAJ by following patterns.
+ * 1. Convert a NAAJ to an Empty LocalRelation when buildSide is
EmptyHashedRelationWithAllNullKeys.
+ * 2. Convert a NAAJ to left plan when buildSide is empty.
*/
object EliminateNullAwareAntiJoin extends Rule[LogicalPlan] {
- private def canEliminate(plan: LogicalPlan): Boolean = plan match {
+ private def canConvertToEmptyLocalRelation(plan: LogicalPlan): Boolean =
plan match {
case LogicalQueryStage(_, stage: BroadcastQueryStageExec) if
stage.resultOption.get().isDefined
&& stage.broadcast.relationFuture.get().value ==
EmptyHashedRelationWithAllNullKeys => true
+ case LogicalQueryStage(_, stage: ShuffleQueryStageExec) if
stage.resultOption.get().isDefined =>
+ // Equivalent to EmptyHashedRelationWithAllNullKeys
+ val nullPartitionKeyMetrics =
stage.shuffle.asInstanceOf[ShuffleExchangeExec]
+ .metrics.get(ShuffleExchangeExec.NULL_PARTITION_KEY_RECORDS_WRITTEN)
+ if (nullPartitionKeyMetrics.isDefined &&
nullPartitionKeyMetrics.get.value > 0L) {
+ true
+ } else {
+ false
+ }
+ case _ => false
+ }
+
+ private def canConvertToLeftPlan(plan: LogicalPlan): Boolean = plan match {
Review comment:
both canConvertXXX is to remove AntiJoin. maybe canRemoveAntiJoin is not
the best option.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]