ueshin commented on a change in pull request #34053:
URL: https://github.com/apache/spark/pull/34053#discussion_r715072689
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
##########
@@ -2122,6 +2125,68 @@ object RewriteIntersectAll extends Rule[LogicalPlan] {
}
}
+/**
+ * Replaces logical [[AsOfJoin]] operator using a combination of Join and
Aggregate operator.
+ *
+ * Input Pseudo-Query:
+ * {{{
+ * SELECT * FROM left ASOF JOIN right ON (condition, as_of on(left.t,
right.t), tolerance)
+ * }}}
+ *
+ * Rewritten Query:
+ * {{{
+ * SELECT left.*, __right__.*
+ * FROM (
+ * SELECT
+ * left.*,
+ * (
+ * SELECT MIN_BY(STRUCT(right.*), left.t - right.t)
+ * FROM right
+ * WHERE condition AND left.t >= right.t AND right.t >=
left.t - tolerance
+ * ) as __right__
+ * FROM left
+ * )
+ * }}}
+ */
+object RewriteAsOfJoin extends Rule[LogicalPlan] {
+ def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
+ _.containsPattern(AS_OF_JOIN), ruleId) {
+ case AsOfJoin(left, right, asOfCondition, condition, orderExpression,
joinType) =>
+ val conditionWithOuterReference =
+ condition.map(And(_,
asOfCondition)).getOrElse(asOfCondition).transformUp {
+ case a: AttributeReference if left.outputSet.contains(a) =>
+ OuterReference(a)
+ }
+ val filtered = Filter(conditionWithOuterReference, right)
+
+ val orderExpressionWithOuterReference = orderExpression.transformUp {
+ case a: AttributeReference if left.outputSet.contains(a) =>
+ OuterReference(a)
+ }
+ val rightStruct = CreateStruct(right.output)
+ val nearestRight = MinBy(rightStruct, orderExpressionWithOuterReference)
+ .toAggregateExpression()
+ val aggExpr = Alias(nearestRight, "__nearest_right__")()
+ val aggregate = Aggregate(Seq.empty, Seq(aggExpr), filtered)
+
+ val scalarSubquery = Project(
Review comment:
sure, updated.
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
##########
@@ -2122,6 +2125,68 @@ object RewriteIntersectAll extends Rule[LogicalPlan] {
}
}
+/**
+ * Replaces logical [[AsOfJoin]] operator using a combination of Join and
Aggregate operator.
+ *
+ * Input Pseudo-Query:
+ * {{{
+ * SELECT * FROM left ASOF JOIN right ON (condition, as_of on(left.t,
right.t), tolerance)
+ * }}}
+ *
+ * Rewritten Query:
+ * {{{
+ * SELECT left.*, __right__.*
+ * FROM (
+ * SELECT
+ * left.*,
+ * (
+ * SELECT MIN_BY(STRUCT(right.*), left.t - right.t)
+ * FROM right
+ * WHERE condition AND left.t >= right.t AND right.t >=
left.t - tolerance
+ * ) as __right__
+ * FROM left
+ * )
+ * }}}
+ */
+object RewriteAsOfJoin extends Rule[LogicalPlan] {
Review comment:
Sure, moved it to a new file.
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
##########
@@ -185,6 +185,8 @@ abstract class Optimizer(catalogManager: CatalogManager)
// PropagateEmptyRelation can change the nullability of an attribute
from nullable to
// non-nullable when an empty relation child of a Union is removed
UpdateAttributeNullability) ::
+ Batch("Rewrite with Correlated Expressions", Once,
+ RewriteAsOfJoin) ::
Review comment:
Sounds good. Let me try and move it there.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]