jchen5 commented on code in PR #39759:
URL: https://github.com/apache/spark/pull/39759#discussion_r1093203252
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala:
##########
@@ -255,26 +256,72 @@ object DecorrelateInnerQuery extends PredicateHelper {
* Rewrites a domain join cond so that it can be pushed to the right side of
a
* union/intersect/except operator.
*/
- def pushConditionsThroughUnion(
+ def pushDomainConditionsThroughSetOperation(
conditions: Seq[Expression],
- union: Union,
+ setOp: LogicalPlan, // Union or SetOperation
child: LogicalPlan): Seq[Expression] = {
// The output attributes are always equal to the left child's output
- assert(union.output.size == child.output.size)
- val map = AttributeMap(union.output.zip(child.output))
+ assert(setOp.output.size == child.output.size)
+ val map = AttributeMap(setOp.output.zip(child.output))
conditions.map {
// The left hand side is the domain attribute used in the inner query
and the right hand side
// is the attribute from the outer query. (See comment above in
buildDomainAttrMap.)
// We need to remap the attribute names used in the inner query (left
hand side) to account
// for the different names in each union child. We should not remap the
attribute names used
// in the outer query.
+ //
+ // Note: the reason we can't just use the original joinCond from when
the DomainJoin was
+ // constructed is that constructing the DomainJoins happens much earlier
than rewriting the
+ // DomainJoins into actual joins, with many optimization steps in
+ // between, which could change the attributes involved (e.g.
CollapseProject).
case EqualNullSafe(left: Attribute, right: Expression) =>
EqualNullSafe(map.getOrElse(left, left), right)
case EqualTo(left: Attribute, right: Expression) =>
Review Comment:
Good point, yeah that should work.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]