This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 7fa63789f8 Consolidate `EliminateNestedUnion` and `EliminateOneUnion` 
optimizer rules' (#18678)
7fa63789f8 is described below

commit 7fa63789f8aa4d7df30072407b767184cb06b38a
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Nov 17 08:57:42 2025 -0500

    Consolidate `EliminateNestedUnion` and `EliminateOneUnion` optimizer rules' 
(#18678)
    
    ## Which issue does this PR close?
    
    - Related to https://github.com/apache/datafusion/issues/17261
    
    
    ## Rationale for this change
    
    
    Each time a LogicalPlan is rewritten to eliminate a Union, we traverse
    the entire plan tree and copy some non trivial parts of it
    
    Thus it is faster to plan when we have fewer passes over the plan tree
    
    the EliminateNestedUnion and EliminateOneUnion rules both do similar
    things, and
    the EliminateNestedUnion rule is very simple. So let's combine them into
    a
    single rule that does both things in one pass over the plan tree.
    
    ## What changes are included in this PR?
    
    Consolidate `EliminateNestedUnion` and `EliminateOneUnion` optimizer
    rules into a single pass
    
    ## Are these changes tested?
    Yes with existing tests
    
    I will also run planning benchmarks
    ## Are there any user-facing changes?
    No
    
    ---------
    
    Co-authored-by: Jeffrey Vo <[email protected]>
---
 datafusion/optimizer/src/eliminate_one_union.rs    | 121 ---------------------
 datafusion/optimizer/src/lib.rs                    |   9 +-
 ...liminate_nested_union.rs => optimize_unions.rs} |  43 ++++++--
 datafusion/optimizer/src/optimizer.rs              |   7 +-
 .../optimizer/src/propagate_empty_relation.rs      |   4 +-
 .../proto/tests/cases/roundtrip_logical_plan.rs    |   4 +-
 datafusion/sqllogictest/test_files/explain.slt     |  12 +-
 7 files changed, 53 insertions(+), 147 deletions(-)

diff --git a/datafusion/optimizer/src/eliminate_one_union.rs 
b/datafusion/optimizer/src/eliminate_one_union.rs
deleted file mode 100644
index 3e02781142..0000000000
--- a/datafusion/optimizer/src/eliminate_one_union.rs
+++ /dev/null
@@ -1,121 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! [`EliminateOneUnion`]  eliminates single element `Union`
-
-use crate::{OptimizerConfig, OptimizerRule};
-use datafusion_common::{tree_node::Transformed, Result};
-use datafusion_expr::logical_plan::{LogicalPlan, Union};
-use std::sync::Arc;
-
-use crate::optimizer::ApplyOrder;
-
-#[derive(Default, Debug)]
-/// An optimization rule that eliminates union with one element.
-pub struct EliminateOneUnion;
-
-impl EliminateOneUnion {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl OptimizerRule for EliminateOneUnion {
-    fn name(&self) -> &str {
-        "eliminate_one_union"
-    }
-
-    fn supports_rewrite(&self) -> bool {
-        true
-    }
-
-    fn rewrite(
-        &self,
-        plan: LogicalPlan,
-        _config: &dyn OptimizerConfig,
-    ) -> Result<Transformed<LogicalPlan>> {
-        match plan {
-            LogicalPlan::Union(Union { mut inputs, .. }) if inputs.len() == 1 
=> Ok(
-                Transformed::yes(Arc::unwrap_or_clone(inputs.pop().unwrap())),
-            ),
-            _ => Ok(Transformed::no(plan)),
-        }
-    }
-
-    fn apply_order(&self) -> Option<ApplyOrder> {
-        Some(ApplyOrder::TopDown)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::test::*;
-    use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion_common::ToDFSchema;
-    use datafusion_expr::{
-        expr_rewriter::coerce_plan_expr_for_schema, logical_plan::table_scan,
-    };
-    use std::sync::Arc;
-
-    fn schema() -> Schema {
-        Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("key", DataType::Utf8, false),
-            Field::new("value", DataType::Int32, false),
-        ])
-    }
-
-    fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> 
Result<()> {
-        assert_optimized_plan_with_rules(
-            vec![Arc::new(EliminateOneUnion::new())],
-            plan,
-            expected,
-            true,
-        )
-    }
-
-    #[test]
-    fn eliminate_nothing() -> Result<()> {
-        let plan_builder = table_scan(Some("table"), &schema(), None)?;
-
-        let plan = plan_builder.clone().union(plan_builder.build()?)?.build()?;
-
-        let expected = "\
-        Union\
-        \n  TableScan: table\
-        \n  TableScan: table";
-        assert_optimized_plan_equal(plan, expected)
-    }
-
-    #[test]
-    fn eliminate_one_union() -> Result<()> {
-        let table_plan = coerce_plan_expr_for_schema(
-            table_scan(Some("table"), &schema(), None)?.build()?,
-            &schema().to_dfschema()?,
-        )?;
-        let schema = Arc::clone(table_plan.schema());
-        let single_union_plan = LogicalPlan::Union(Union {
-            inputs: vec![Arc::new(table_plan)],
-            schema,
-        });
-
-        let expected = "TableScan: table";
-        assert_optimized_plan_equal(single_union_plan, expected)
-    }
-}
diff --git a/datafusion/optimizer/src/lib.rs b/datafusion/optimizer/src/lib.rs
index 07ef2a46cb..7632ff858d 100644
--- a/datafusion/optimizer/src/lib.rs
+++ b/datafusion/optimizer/src/lib.rs
@@ -51,12 +51,17 @@ pub mod eliminate_filter;
 pub mod eliminate_group_by_constant;
 pub mod eliminate_join;
 pub mod eliminate_limit;
-pub mod eliminate_nested_union;
-pub mod eliminate_one_union;
+#[deprecated(since = "52.0.0", note = "Please use OptimizeUnions instead")]
+pub mod eliminate_nested_union {
+    use crate::optimize_unions::OptimizeUnions;
+    #[deprecated(since = "52.0.0", note = "Please use OptimizeUnions instead")]
+    pub type EliminateNestedUnion = OptimizeUnions;
+}
 pub mod eliminate_outer_join;
 pub mod extract_equijoin_predicate;
 pub mod filter_null_join_keys;
 pub mod optimize_projections;
+pub mod optimize_unions;
 pub mod optimizer;
 pub mod propagate_empty_relation;
 pub mod push_down_filter;
diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs 
b/datafusion/optimizer/src/optimize_unions.rs
similarity index 90%
rename from datafusion/optimizer/src/eliminate_nested_union.rs
rename to datafusion/optimizer/src/optimize_unions.rs
index f8f93727cd..cfabd512b4 100644
--- a/datafusion/optimizer/src/eliminate_nested_union.rs
+++ b/datafusion/optimizer/src/optimize_unions.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`EliminateNestedUnion`]: flattens nested `Union` to a single `Union`
+//! [`OptimizeUnions`]: removes `Union` nodes in the logical plan.
 use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::tree_node::Transformed;
@@ -26,19 +26,21 @@ use itertools::Itertools;
 use std::sync::Arc;
 
 #[derive(Default, Debug)]
-/// An optimization rule that replaces nested unions with a single union.
-pub struct EliminateNestedUnion;
+/// An optimization rule that
+/// 1. replaces nested unions with a single union.
+/// 2. removes unions with a single input.
+pub struct OptimizeUnions;
 
-impl EliminateNestedUnion {
+impl OptimizeUnions {
     #[allow(missing_docs)]
     pub fn new() -> Self {
         Self {}
     }
 }
 
-impl OptimizerRule for EliminateNestedUnion {
+impl OptimizerRule for OptimizeUnions {
     fn name(&self) -> &str {
-        "eliminate_nested_union"
+        "optimize_unions"
     }
 
     fn apply_order(&self) -> Option<ApplyOrder> {
@@ -55,6 +57,9 @@ impl OptimizerRule for EliminateNestedUnion {
         _config: &dyn OptimizerConfig,
     ) -> Result<Transformed<LogicalPlan>> {
         match plan {
+            LogicalPlan::Union(Union { mut inputs, .. }) if inputs.len() == 1 
=> Ok(
+                Transformed::yes(Arc::unwrap_or_clone(inputs.pop().unwrap())),
+            ),
             LogicalPlan::Union(Union { inputs, schema }) => {
                 let inputs = inputs
                     .into_iter()
@@ -139,7 +144,7 @@ mod tests {
             let analyzed_plan = 
Analyzer::with_rules(vec![Arc::new(TypeCoercion::new())])
                 .execute_and_check($plan, &options, |_, _| {})?;
             let optimizer_ctx = OptimizerContext::new().with_max_passes(1);
-            let rules: Vec<Arc<dyn crate::OptimizerRule + Send + Sync>> = 
vec![Arc::new(EliminateNestedUnion::new())];
+            let rules: Vec<Arc<dyn crate::OptimizerRule + Send + Sync>> = 
vec![Arc::new(OptimizeUnions::new())];
             assert_optimized_plan_eq_snapshot!(
                 optimizer_ctx,
                 rules,
@@ -420,4 +425,28 @@ mod tests {
               TableScan: table_1
         ")
     }
+
+    #[test]
+    fn eliminate_one_union() -> Result<()> {
+        let plan = table_scan(Some("table"), &schema(), None)?.build()?;
+        let schema = Arc::clone(plan.schema());
+        // note it is not possible to create a single input union via
+        // LogicalPlanBuilder so create it manually here
+        let plan = LogicalPlan::Union(Union {
+            inputs: vec![Arc::new(plan)],
+            schema,
+        });
+
+        // Note we can't use the same assert_optimized_plan_equal as creating a
+        // single input union is not possible via LogicalPlanBuilder and other 
passes
+        // throw errors / don't handle the schema correctly.
+        assert_optimized_plan_eq_snapshot!(
+            OptimizerContext::new().with_max_passes(1),
+            vec![Arc::new(OptimizeUnions::new())],
+            plan,
+            @r"
+        TableScan: table
+        "
+        )
+    }
 }
diff --git a/datafusion/optimizer/src/optimizer.rs 
b/datafusion/optimizer/src/optimizer.rs
index 084152d40e..421563d5e7 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -41,12 +41,11 @@ use crate::eliminate_filter::EliminateFilter;
 use crate::eliminate_group_by_constant::EliminateGroupByConstant;
 use crate::eliminate_join::EliminateJoin;
 use crate::eliminate_limit::EliminateLimit;
-use crate::eliminate_nested_union::EliminateNestedUnion;
-use crate::eliminate_one_union::EliminateOneUnion;
 use crate::eliminate_outer_join::EliminateOuterJoin;
 use crate::extract_equijoin_predicate::ExtractEquijoinPredicate;
 use crate::filter_null_join_keys::FilterNullJoinKeys;
 use crate::optimize_projections::OptimizeProjections;
+use crate::optimize_unions::OptimizeUnions;
 use crate::plan_signature::LogicalPlanSignature;
 use crate::propagate_empty_relation::PropagateEmptyRelation;
 use crate::push_down_filter::PushDownFilter;
@@ -228,7 +227,7 @@ impl Optimizer {
     /// Create a new optimizer using the recommended list of rules
     pub fn new() -> Self {
         let rules: Vec<Arc<dyn OptimizerRule + Sync + Send>> = vec![
-            Arc::new(EliminateNestedUnion::new()),
+            Arc::new(OptimizeUnions::new()),
             Arc::new(SimplifyExpressions::new()),
             Arc::new(ReplaceDistinctWithAggregate::new()),
             Arc::new(EliminateJoin::new()),
@@ -241,8 +240,6 @@ impl Optimizer {
             Arc::new(EliminateCrossJoin::new()),
             Arc::new(EliminateLimit::new()),
             Arc::new(PropagateEmptyRelation::new()),
-            // Must be after PropagateEmptyRelation
-            Arc::new(EliminateOneUnion::new()),
             Arc::new(FilterNullJoinKeys::default()),
             Arc::new(EliminateOuterJoin::new()),
             // Filters can't be pushed down past Limits, we should do 
PushDownFilter after PushDownLimit
diff --git a/datafusion/optimizer/src/propagate_empty_relation.rs 
b/datafusion/optimizer/src/propagate_empty_relation.rs
index 4db3215dfb..629b13e400 100644
--- a/datafusion/optimizer/src/propagate_empty_relation.rs
+++ b/datafusion/optimizer/src/propagate_empty_relation.rs
@@ -244,7 +244,7 @@ mod tests {
 
     use crate::assert_optimized_plan_eq_snapshot;
     use crate::eliminate_filter::EliminateFilter;
-    use crate::eliminate_nested_union::EliminateNestedUnion;
+    use crate::optimize_unions::OptimizeUnions;
     use crate::test::{
         assert_optimized_plan_with_rules, test_table_scan, 
test_table_scan_fields,
         test_table_scan_with_name,
@@ -277,7 +277,7 @@ mod tests {
         assert_optimized_plan_with_rules(
             vec![
                 Arc::new(EliminateFilter::new()),
-                Arc::new(EliminateNestedUnion::new()),
+                Arc::new(OptimizeUnions::new()),
                 Arc::new(PropagateEmptyRelation::new()),
             ],
             plan,
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs 
b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index bfd693e6a0..989589dfb8 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -29,7 +29,7 @@ use datafusion::datasource::listing::{
     ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
 };
 use datafusion::execution::options::ArrowReadOptions;
-use datafusion::optimizer::eliminate_nested_union::EliminateNestedUnion;
+use datafusion::optimizer::optimize_unions::OptimizeUnions;
 use datafusion::optimizer::Optimizer;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_functions_aggregate::sum::sum_distinct;
@@ -2744,7 +2744,7 @@ async fn roundtrip_union_query() -> Result<()> {
     let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     // proto deserialization only supports 2-way union, hence this plan has 
nested unions
     // apply the flatten unions optimizer rule to be able to compare
-    let optimizer = 
Optimizer::with_rules(vec![Arc::new(EliminateNestedUnion::new())]);
+    let optimizer = 
Optimizer::with_rules(vec![Arc::new(OptimizeUnions::new())]);
     let unnested = optimizer.optimize(logical_round_trip, &(ctx.state()), |_x, 
_y| {})?;
     assert_eq!(
         format!("{}", plan.display_indent_schema()),
diff --git a/datafusion/sqllogictest/test_files/explain.slt 
b/datafusion/sqllogictest/test_files/explain.slt
index d7af5ff4b9..918c01b561 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -176,7 +176,7 @@ initial_logical_plan
 logical_plan after resolve_grouping_function SAME TEXT AS ABOVE
 logical_plan after type_coercion SAME TEXT AS ABOVE
 analyzed_logical_plan SAME TEXT AS ABOVE
-logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
+logical_plan after optimize_unions SAME TEXT AS ABOVE
 logical_plan after simplify_expressions SAME TEXT AS ABOVE
 logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
 logical_plan after eliminate_join SAME TEXT AS ABOVE
@@ -189,7 +189,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE
 logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
 logical_plan after eliminate_limit SAME TEXT AS ABOVE
 logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
-logical_plan after eliminate_one_union SAME TEXT AS ABOVE
 logical_plan after filter_null_join_keys SAME TEXT AS ABOVE
 logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
 logical_plan after push_down_limit SAME TEXT AS ABOVE
@@ -198,7 +197,7 @@ logical_plan after single_distinct_aggregation_to_group_by 
SAME TEXT AS ABOVE
 logical_plan after eliminate_group_by_constant SAME TEXT AS ABOVE
 logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
 logical_plan after optimize_projections TableScan: simple_explain_test 
projection=[a, b, c]
-logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
+logical_plan after optimize_unions SAME TEXT AS ABOVE
 logical_plan after simplify_expressions SAME TEXT AS ABOVE
 logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
 logical_plan after eliminate_join SAME TEXT AS ABOVE
@@ -211,7 +210,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE
 logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
 logical_plan after eliminate_limit SAME TEXT AS ABOVE
 logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
-logical_plan after eliminate_one_union SAME TEXT AS ABOVE
 logical_plan after filter_null_join_keys SAME TEXT AS ABOVE
 logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
 logical_plan after push_down_limit SAME TEXT AS ABOVE
@@ -537,7 +535,7 @@ initial_logical_plan
 logical_plan after resolve_grouping_function SAME TEXT AS ABOVE
 logical_plan after type_coercion SAME TEXT AS ABOVE
 analyzed_logical_plan SAME TEXT AS ABOVE
-logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
+logical_plan after optimize_unions SAME TEXT AS ABOVE
 logical_plan after simplify_expressions SAME TEXT AS ABOVE
 logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
 logical_plan after eliminate_join SAME TEXT AS ABOVE
@@ -550,7 +548,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE
 logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
 logical_plan after eliminate_limit SAME TEXT AS ABOVE
 logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
-logical_plan after eliminate_one_union SAME TEXT AS ABOVE
 logical_plan after filter_null_join_keys SAME TEXT AS ABOVE
 logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
 logical_plan after push_down_limit SAME TEXT AS ABOVE
@@ -559,7 +556,7 @@ logical_plan after single_distinct_aggregation_to_group_by 
SAME TEXT AS ABOVE
 logical_plan after eliminate_group_by_constant SAME TEXT AS ABOVE
 logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
 logical_plan after optimize_projections TableScan: simple_explain_test 
projection=[a, b, c]
-logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
+logical_plan after optimize_unions SAME TEXT AS ABOVE
 logical_plan after simplify_expressions SAME TEXT AS ABOVE
 logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
 logical_plan after eliminate_join SAME TEXT AS ABOVE
@@ -572,7 +569,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE
 logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
 logical_plan after eliminate_limit SAME TEXT AS ABOVE
 logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
-logical_plan after eliminate_one_union SAME TEXT AS ABOVE
 logical_plan after filter_null_join_keys SAME TEXT AS ABOVE
 logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
 logical_plan after push_down_limit SAME TEXT AS ABOVE


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to