AngersZhuuuu commented on code in PR #46263:
URL: https://github.com/apache/spark/pull/46263#discussion_r1590728816
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala:
##########
@@ -129,17 +136,36 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with
PredicateHelper with J
if (left.output.exists(_.semanticEquals(targetKey))) {
extract(left, AttributeSet.empty, hasHitFilter = false,
hasHitSelectiveFilter = false,
currentPlan = left, targetKey = targetKey).orElse {
- // We can also extract from the right side if the join keys are
transitive.
- lkeys.zip(rkeys).find(_._1.semanticEquals(targetKey)).map(_._2)
- .flatMap { newTargetKey =>
- extract(right, AttributeSet.empty,
- hasHitFilter = false, hasHitSelectiveFilter = false,
currentPlan = right,
- targetKey = newTargetKey)
- }
+ // For the exact join key match, like the left table here, it's
always OK to generate
+ // the runtime filter using this left table, no matter what the
join type is.
+ // This is because left table always produce a superset of output
of the join output
+ // regarding the left keys.
+ // For transitive join key match, it's different. The right table
here does
+ // not always generate a superset output regarding left keys.
+ // Let's look at an example
+ // left table: 1, 2, 3
+ // right table, 3, 4
+ // left outer join output: (1, null), (2, null), (3, 3)
+ // left keys: 1, 2, 3
+ // So we can't use right table to generate runtime filter.
+ if (canExtractRight(joinType)) {
+ lkeys.zip(rkeys).find(_._1.semanticEquals(targetKey)).map(_._2)
+ .flatMap { newTargetKey =>
+ extract(right, AttributeSet.empty,
+ hasHitFilter = false, hasHitSelectiveFilter = false,
currentPlan = right,
+ targetKey = newTargetKey)
+ }
+ } else {
+ None
+ }
}
} else if (right.output.exists(_.semanticEquals(targetKey))) {
- extract(right, AttributeSet.empty, hasHitFilter = false,
hasHitSelectiveFilter = false,
- currentPlan = right, targetKey = targetKey).orElse {
+ if (canExtractRight(joinType)) {
Review Comment:
Like current ?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]