Dandandan commented on code in PR #7192:
URL: https://github.com/apache/arrow-datafusion/pull/7192#discussion_r1286317691


##########
datafusion/core/src/physical_optimizer/limit_aggregation.rs:
##########
@@ -0,0 +1,117 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! An optimizer rule that detects aggregate operations that could use a 
limited bucket count
+
+use crate::physical_optimizer::PhysicalOptimizerRule;
+use crate::physical_plan::aggregates::AggregateExec;
+use crate::physical_plan::sorts::sort::SortExec;
+use crate::physical_plan::ExecutionPlan;
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::tree_node::{Transformed, TreeNode};
+use datafusion_common::Result;
+use datafusion_physical_expr::expressions::Column;
+use std::sync::Arc;
+
+/// An optimizer rule that passes a `limit` hint to aggregations if the whole 
result is not needed
+pub struct LimitAggregation {}
+
+impl LimitAggregation {
+    /// Create a new `LimitAggregation`
+    pub fn new() -> Self {
+        Self {}
+    }
+
+    fn transform(plan: Arc<dyn ExecutionPlan>) -> Option<Arc<dyn 
ExecutionPlan>> {
+        let sort = plan.as_any().downcast_ref::<SortExec>()?;
+
+        // TODO: support sorting on multiple fields
+        let children = sort.children();
+        let child = match children.as_slice() {
+            [child] => child.clone(),
+            _ => return None,
+        };
+
+        // TODO: look more than 1 level deep for aggregates (as long as 
cardinality is preserved)
+        let aggr = (*child).as_any().downcast_ref::<AggregateExec>()?;
+
+        // ensure the sort direction matches aggregate function
+        let (field, desc) = aggr.get_minmax_desc()?;
+        let order = sort.output_ordering()?;
+        let order = match order {
+            [order] => order,
+            _ => return None,
+        };
+        if desc != order.options.descending {
+            return None;
+        }
+
+        // ensure the sort is on the same field as the aggregate output
+        let col = order.expr.as_any().downcast_ref::<Column>()?;
+        if col.name() != field.name() {
+            return None;
+        }
+
+        // We found what we want: clone, copy the limit down, and return 
modified node
+        let mut new_aggr = AggregateExec::try_new(
+            aggr.mode,
+            aggr.group_by.clone(),
+            aggr.aggr_expr.clone(),
+            aggr.filter_expr.clone(),
+            aggr.order_by_expr.clone(),
+            aggr.input.clone(),
+            aggr.input_schema.clone(),
+        )
+        .ok()?; // TODO: handle error?
+        new_aggr.limit = sort.fetch();
+        let plan = SortExec::new(sort.expr().to_vec(), Arc::new(new_aggr));
+        Some(Arc::new(plan))
+    }
+}
+
+impl Default for LimitAggregation {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl PhysicalOptimizerRule for LimitAggregation {
+    fn optimize(
+        &self,
+        plan: Arc<dyn ExecutionPlan>,
+        _config: &ConfigOptions,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let plan = plan.transform_down(&|plan| {
+            Ok(
+                if let Some(plan) = LimitAggregation::transform(plan.clone()) {
+                    Transformed::Yes(plan)
+                } else {
+                    Transformed::No(plan)
+                },
+            )
+        })?;
+        Ok(plan)
+    }
+
+    fn name(&self) -> &str {
+        "limit aggregation"

Review Comment:
   ```suggestion
           "LimitAggregation"
   ```
   ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to