AssHero commented on code in PR #2823:
URL: https://github.com/apache/arrow-datafusion/pull/2823#discussion_r914353805


##########
datafusion/optimizer/src/eliminate_limit.rs:
##########
@@ -35,35 +39,98 @@ impl EliminateLimit {
     }
 }
 
+/// Ancestor indicates the current ancestor in the LogicalPlan tree
+/// when traversing down related to "eliminate limit".
+enum Ancestor {
+    /// Limit
+    FromLimit { skip: Option<usize> },
+    /// Other nodes that don't affect the adjustment of "Limit"
+    NotRelevant,
+}
+
+/// replaces LIMIT 0 with an [LogicalPlan::EmptyRelation]
+/// replaces LIMIT node whose ancestor LIMIT's skip is greater than or equal 
to current's fetch
+/// with an [LogicalPlan::EmptyRelation]
+fn eliminate_limit(
+    _optimizer: &EliminateLimit,
+    ancestor: &Ancestor,
+    plan: &LogicalPlan,
+    _optimizer_config: &OptimizerConfig,
+) -> Result<LogicalPlan> {
+    match plan {
+        LogicalPlan::Limit(Limit {
+            skip, fetch, input, ..
+        }) => {
+            let ancestor_skip = match ancestor {
+                Ancestor::FromLimit { skip, .. } => skip.unwrap_or(0),
+                _ => 0,
+            };
+            // If ancestor's skip is equal or greater than current's fetch,
+            // replaces with an [LogicalPlan::EmptyRelation].
+            // For such query, the inner query(select * from xxx limit 5) 
should be optimized as an EmptyRelation:
+            // select * from (select * from xxx limit 5) a limit 2 offset 5;
+            match fetch {
+                Some(fetch) => {
+                    if *fetch == 0 || ancestor_skip >= *fetch {
+                        return Ok(LogicalPlan::EmptyRelation(EmptyRelation {
+                            produce_one_row: false,
+                            schema: input.schema().clone(),
+                        }));
+                    }
+                }
+                None => {}
+            }
+
+            let expr = plan.expressions();
+
+            // apply the optimization to all inputs of the plan
+            let inputs = plan.inputs();
+            let new_inputs = inputs
+                .iter()
+                .map(|plan| {
+                    eliminate_limit(
+                        _optimizer,
+                        &Ancestor::FromLimit { skip: *skip },
+                        plan,
+                        _optimizer_config,
+                    )
+                })
+                .collect::<Result<Vec<_>>>()?;
+
+            from_plan(plan, &expr, &new_inputs)
+        }
+        // Rest: recurse and find possible LIMIT 0/Multi LIMIT OFFSET nodes
+        _ => {
+            // For those plans(projection/sort/..) which do not affect the 
output rows of sub-plans, we still use ancestor;
+            // otherwise, use NotRelevant instead.
+            let ancestor = match plan {
+                LogicalPlan::Projection { .. } | LogicalPlan::Sort { .. } => 
ancestor,
+                _ => &Ancestor::NotRelevant,

Review Comment:
   > I wonder if `Ancester::Unknown` would make the intent clearer compared to 
`Ancester::NotRelevant`?
   
   This is consistent with limit_push_down.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to