jayzhan211 commented on code in PR #10061: URL: https://github.com/apache/arrow-datafusion/pull/10061#discussion_r1562562510
########## datafusion/core/src/physical_optimizer/convert_first_last.rs: ########## @@ -0,0 +1,456 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_common::Result; +use datafusion_common::{ + config::ConfigOptions, + not_impl_err, + tree_node::{Transformed, TransformedResult, TreeNode}, +}; +use datafusion_physical_expr::expressions::{FirstValue, LastValue}; +use datafusion_physical_expr::{ + equivalence::ProjectionMapping, reverse_order_bys, AggregateExpr, + EquivalenceProperties, LexRequirement, PhysicalSortRequirement, +}; +use datafusion_physical_plan::aggregates::{concat_slices, finer_ordering}; +use datafusion_physical_plan::{ + aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy}, + ExecutionPlan, ExecutionPlanProperties, InputOrderMode, +}; +use std::sync::Arc; + +use datafusion_physical_expr::equivalence::collapse_lex_req; +use datafusion_physical_plan::windows::get_ordered_partition_by_indices; + +use super::PhysicalOptimizerRule; + +/// The optimizer rule check the ordering requirements of the aggregate expressions. +/// And convert between FIRST_VALUE and LAST_VALUE if possible. +/// For example, If we have an ascending values and we want LastValue from the descending requirement, +/// it is equivalent to FirstValue with the current ascending ordering. +/// +/// The concrete example is that, says we have values c1 with [1, 2, 3], which is an ascending order. +/// If we want LastValue(c1 order by desc), which is the first value of reversed c1 [3, 2, 1], +/// so we can convert the aggregate expression to FirstValue(c1 order by asc), +/// since the current ordering is already satisfied, it saves our time! +#[derive(Default)] +pub struct ConvertFirstLast {} + +impl ConvertFirstLast { + pub fn new() -> Self { + Self::default() + } +} + +impl PhysicalOptimizerRule for ConvertFirstLast { + fn optimize( + &self, + plan: Arc<dyn ExecutionPlan>, + _config: &ConfigOptions, + ) -> Result<Arc<dyn ExecutionPlan>> { + plan.transform_down(&get_common_requirement_of_aggregate_input) + .data() + } + + fn name(&self) -> &str { + "SimpleOrdering" + } + + fn schema_check(&self) -> bool { + true + } +} + +fn get_common_requirement_of_aggregate_input( Review Comment: I recursively go down to the inner children and see if it is AggregateExec, if rewritten, update the parent too. I think there exists TreeRewrite API gracefully does this thing, but I'm not so familiar yet. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
