dilipbiswal commented on a change in pull request #24271: [SPAR-27342][SQL]
Optimize Limit 0 queries
URL: https://github.com/apache/spark/pull/24271#discussion_r272068261
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
##########
@@ -1711,3 +1713,37 @@ object RemoveRepetitionFromGroupExpressions extends
Rule[LogicalPlan] {
}
}
}
+
+/**
+ * Replaces GlobalLimit 0 and LocalLimit 0 nodes (subtree) with empty Local
Relation, as they don't
+ * return any rows.
+ */
+object OptimizeLimitZero extends Rule[LogicalPlan] {
+ // returns empty Local Relation corresponding to given plan
+ private def empty(plan: LogicalPlan) =
+ LocalRelation(plan.output, data = Seq.empty, isStreaming =
plan.isStreaming)
+
+ def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
+ // Nodes below GlobalLimit or LocalLimit can be pruned if the limit value
is zero (0).
+ // Any subtree in the logical plan that has GlobalLimit 0 or LocalLimit 0
as its root is
+ // semantically equivalent to an empty relation.
+ //
+ // In such cases, the effects of Limit 0 can be propagated through the
Logical Plan by replacing
+ // the (Global/Local) Limit subtree with an empty LocalRelation, thereby
pruning the subtree
+ // below and triggering other optimization rules of PropagateEmptyRelation
to propagate the
+ // changes up the Logical Plan.
+ //
+ // Replace Global Limit 0 nodes with empty Local Relation
+ case gl @ GlobalLimit(IntegerLiteral(limit), _) if limit == 0 =>
+ empty(gl)
+
+ // Note: For all SQL queries, if a LocalLimit 0 node exists in the Logical
Plan, then a
+ // GlobalLimit 0 node would also exist. Thus, the above case would be
sufficient to handle
+ // almost all cases. However, if a user explicitly creates a Logical Plan
with LocalLimit 0 node
+ // then the following rule will handle that case as well.
+ //
+ // Replace Local Limit 0 nodes with empty Local Relation
+ case ll @ LocalLimit(IntegerLiteral(limit), _) if limit == 0 =>
Review comment:
same as above..
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]