Github user srowen commented on a diff in the pull request:
https://github.com/apache/spark/pull/21109#discussion_r193734550
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
---
@@ -131,13 +135,100 @@ object ExtractEquiJoinKeys extends Logging with
PredicateHelper {
if (joinKeys.nonEmpty) {
val (leftKeys, rightKeys) = joinKeys.unzip
- logDebug(s"leftKeys:$leftKeys | rightKeys:$rightKeys")
- Some((joinType, leftKeys, rightKeys,
otherPredicates.reduceOption(And), left, right))
+ // Find any simple range expressions between two columns
+ // (and involving only those two columns) of the two tables being
joined,
+ // which are not used in the equijoin expressions,
+ // and which can be used for secondary sort optimizations.
+ // rangePreds will contain the original expressions to be filtered
out later.
+ val rangePreds: mutable.Set[Expression] = mutable.Set.empty
+ var rangeConditions: Seq[BinaryComparison] =
+ if (SQLConf.get.useSmjInnerRangeOptimization) {
+ otherPredicates.flatMap {
+ case p@LessThan(l, r) => checkRangeConditions(l, r, left,
right, joinKeys).map {
+ case true => rangePreds.add(p); GreaterThan(r, l)
+ case false => rangePreds.add(p); p
+ }
+ case p@LessThanOrEqual(l, r) =>
+ checkRangeConditions(l, r, left, right, joinKeys).map {
+ case true => rangePreds.add(p); GreaterThanOrEqual(r, l)
+ case false => rangePreds.add(p); p
+ }
+ case p@GreaterThan(l, r) => checkRangeConditions(l, r, left,
right, joinKeys).map {
+ case true => rangePreds.add(p); LessThan(r, l)
+ case false => rangePreds.add(p); p
+ }
+ case p@GreaterThanOrEqual(l, r) =>
+ checkRangeConditions(l, r, left, right, joinKeys).map {
+ case true => rangePreds.add(p); LessThanOrEqual(r, l)
+ case false => rangePreds.add(p); p
+ }
+ case _ => None
+ }
+ } else {
+ Nil
+ }
+
+ // Only using secondary join optimization when both lower and
upper conditions
+ // are specified (e.g. t1.a < t2.b + x and t1.a > t2.b - x)
+ if(rangeConditions.size != 2 ||
+ // Looking for one < and one > comparison:
+ rangeConditions.filter(x => x.isInstanceOf[LessThan] ||
--- End diff --
Instead of checking `.size == 0`, something like
`rangeConditions.forall(... not instance of either ...)`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]