Github user maropu commented on a diff in the pull request:
https://github.com/apache/spark/pull/20345#discussion_r198341155
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
---
@@ -84,19 +84,51 @@ object ReorderJoin extends Rule[LogicalPlan] with
PredicateHelper {
}
}
+ // Extract a list of logical plans to be joined for join-order
comparisons.
+ // Since `ExtractFiltersAndInnerJoins` handles left-deep trees only,
this function have
+ // the same strategy to extract the plan list.
+ private[optimizer] def extractLeftDeepInnerJoins(plan: LogicalPlan)
+ : Seq[LogicalPlan] = plan match {
+ case j @ Join(left, right, _: InnerLike, _) => right +:
extractLeftDeepInnerJoins(left)
+ case Filter(_, child) => extractLeftDeepInnerJoins(child)
+ case Project(_, child) => extractLeftDeepInnerJoins(child)
+ case _ => Seq(plan)
+ }
+
+ private def sameJoinOrder(plan1: LogicalPlan, plan2: LogicalPlan):
Boolean = {
+ extractLeftDeepInnerJoins(plan1) == extractLeftDeepInnerJoins(plan2)
+ }
+
+ private def mayCreateOrderedJoin(
+ originalPlan: LogicalPlan,
+ input: Seq[(LogicalPlan, InnerLike)],
+ conditions: Seq[Expression]): LogicalPlan = {
+ val orderedJoins = createOrderedJoin(input, conditions)
+ if (!sameJoinOrder(orderedJoins, originalPlan)) {
+ if (originalPlan.output != orderedJoins.output) {
+ // Keep the same output attributes and the order
+ Project(originalPlan.output, orderedJoins)
+ } else {
+ orderedJoins
+ }
+ } else {
+ originalPlan
+ }
+ }
+
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
- case ExtractFiltersAndInnerJoins(input, conditions)
+ case p @ ExtractFiltersAndInnerJoins(input, conditions)
if input.size > 2 && conditions.nonEmpty =>
if (SQLConf.get.starSchemaDetection && !SQLConf.get.cboEnabled) {
val starJoinPlan = StarSchemaDetection.reorderStarJoins(input,
conditions)
if (starJoinPlan.nonEmpty) {
val rest = input.filterNot(starJoinPlan.contains(_))
- createOrderedJoin(starJoinPlan ++ rest, conditions)
+ mayCreateOrderedJoin(p, starJoinPlan ++ rest, conditions)
} else {
- createOrderedJoin(input, conditions)
+ mayCreateOrderedJoin(p, input, conditions)
}
} else {
- createOrderedJoin(input, conditions)
+ mayCreateOrderedJoin(p, input, conditions)
}
--- End diff --
ok, I'll brush up the code based on the suggestion.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]