Github user wzhfy commented on a diff in the pull request:
https://github.com/apache/spark/pull/20345#discussion_r175725372
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
---
@@ -84,19 +84,49 @@ object ReorderJoin extends Rule[LogicalPlan] with
PredicateHelper {
}
}
+ // Extract a list of logical plans to be joined for join-order
comparisons.
+ // Since `ExtractFiltersAndInnerJoins` handles left-deep trees only,
this function have
+ // the same strategy to extract the plan list.
+ private def extractLeftDeepInnerJoins(plan: LogicalPlan):
Seq[LogicalPlan] = plan match {
+ case j @ Join(left, right, _: InnerLike, _) => right +:
extractLeftDeepInnerJoins(left)
+ case p @ Project(_, j @ Join(_, _, _: InnerLike, _)) =>
extractLeftDeepInnerJoins(j)
+ case _ => Seq(plan)
+ }
+
+ private def checkSameJoinOrder(plan1: LogicalPlan, plan2: LogicalPlan):
Boolean = {
+ extractLeftDeepInnerJoins(plan1) == extractLeftDeepInnerJoins(plan2)
+ }
+
+ private def mayCreateOrderedJoin(
+ originalPlan: LogicalPlan,
+ input: Seq[(LogicalPlan, InnerLike)],
+ conditions: Seq[Expression]): LogicalPlan = {
+ val orderedJoins = createOrderedJoin(input, conditions)
+ if (!checkSameJoinOrder(orderedJoins, originalPlan)) {
--- End diff --
Is this check necessary? I think check `originalPlan.output !=
orderedJoins.output` is enough, and faster.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]