adriangb commented on code in PR #20117:
URL: https://github.com/apache/datafusion/pull/20117#discussion_r2760985953


##########
datafusion/optimizer/src/extract_leaf_expressions.rs:
##########
@@ -0,0 +1,1916 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`ExtractLeafExpressions`] extracts `MoveTowardsLeafNodes` sub-expressions 
into projections.
+//!
+//! This optimizer rule normalizes the plan so that all `MoveTowardsLeafNodes` 
computations
+//! (like field accessors) live in Projection nodes immediately above scan 
nodes, making them
+//! eligible for pushdown by the `OptimizeProjections` rule.
+//!
+//! ## Algorithm
+//!
+//! This rule uses **TopDown** traversal with projection merging:
+//!
+//! 1. When encountering a projection with `MoveTowardsLeafNodes` expressions, 
look at its input
+//! 2. If input is a Projection, **merge** the expressions through it using 
column replacement
+//! 3. Continue until we hit a barrier node (TableScan, Join, Aggregate)
+//! 4. Idempotency is natural: merged expressions no longer have column refs 
matching projection outputs
+//!
+//! ### Special Cases
+//!
+//! - If ALL expressions in a projection are `MoveTowardsLeafNodes`, push the 
entire projection down
+//! - If NO expressions are `MoveTowardsLeafNodes`, return `Transformed::no`
+//!
+//! ### Node Classification
+//!
+//! **Barrier Nodes** (stop pushing, create projection above):
+//! - `TableScan` - the leaf, ideal extraction point
+//! - `Join` - requires routing to left/right sides
+//! - `Aggregate` - changes schema semantics
+//! - `SubqueryAlias` - scope boundary
+//! - `Union`, `Intersect`, `Except` - schema boundaries
+//!
+//! **Schema-Preserving Nodes** (push through unchanged):
+//! - `Filter` - passes all input columns through
+//! - `Sort` - passes all input columns through
+//! - `Limit` - passes all input columns through
+//!
+//! **Projection Nodes** (merge through):
+//! - Replace column refs with underlying expressions from the child projection
+
+use indexmap::{IndexMap, IndexSet};
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use datafusion_common::alias::AliasGenerator;
+use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
+use datafusion_common::{Column, DFSchema, Result};
+use datafusion_expr::expr_rewriter::NamePreserver;
+use datafusion_expr::logical_plan::LogicalPlan;
+use datafusion_expr::{Expr, ExpressionPlacement, Filter, Limit, Projection, 
Sort};
+
+use crate::optimizer::ApplyOrder;
+use crate::push_down_filter::replace_cols_by_name;
+use crate::utils::{EXTRACTED_EXPR_PREFIX, has_all_column_refs};
+use crate::{OptimizerConfig, OptimizerRule};
+
+/// Extracts `MoveTowardsLeafNodes` sub-expressions from all nodes into 
projections.
+///
+/// This normalizes the plan so that all `MoveTowardsLeafNodes` computations 
(like field
+/// accessors) live in Projection nodes, making them eligible for pushdown.
+///
+/// # Example
+///
+/// Given a filter with a struct field access:
+///
+/// ```text
+/// Filter: user['status'] = 'active'
+///   TableScan: t [user]
+/// ```
+///
+/// This rule extracts the field access into a projection:
+///
+/// ```text
+/// Filter: __datafusion_extracted_1 = 'active'
+///   Projection: user['status'] AS __datafusion_extracted_1, user
+///     TableScan: t [user]
+/// ```
+///
+/// The `OptimizeProjections` rule can then push this projection down to the 
scan.
+///
+/// **Important:** The `PushDownFilter` rule is aware of projections created 
by this rule
+/// and will not push filters through them. See `is_extracted_expr_projection` 
in utils.rs.
+#[derive(Default, Debug)]
+pub struct ExtractLeafExpressions {}
+
+impl ExtractLeafExpressions {
+    /// Create a new [`ExtractLeafExpressions`]
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl OptimizerRule for ExtractLeafExpressions {
+    fn name(&self) -> &str {
+        "extract_leaf_expressions"
+    }
+
+    fn apply_order(&self) -> Option<ApplyOrder> {
+        Some(ApplyOrder::TopDown)
+    }
+
+    fn rewrite(
+        &self,
+        plan: LogicalPlan,
+        config: &dyn OptimizerConfig,
+    ) -> Result<Transformed<LogicalPlan>> {
+        let alias_generator = config.alias_generator();
+        extract_from_plan(plan, alias_generator)
+    }
+}
+
+/// Extracts `MoveTowardsLeafNodes` sub-expressions from a plan node.
+///
+/// With TopDown traversal, we process parent nodes first, allowing us to
+/// merge expressions through child projections.
+fn extract_from_plan(
+    plan: LogicalPlan,
+    alias_generator: &Arc<AliasGenerator>,
+) -> Result<Transformed<LogicalPlan>> {
+    match &plan {
+        // Schema-preserving nodes - extract and push down
+        LogicalPlan::Filter(_) | LogicalPlan::Sort(_) | LogicalPlan::Limit(_) 
=> {
+            extract_from_schema_preserving(plan, alias_generator)

Review Comment:
   I’ll give it a try



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to