This is an automated email from the ASF dual-hosted git repository.
jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new e2e735430e [Minor] Remove/reorder logical plan rules (#15421)
e2e735430e is described below
commit e2e735430ed190ce4a14f944c734b49bffee79ca
Author: Daniƫl Heres <[email protected]>
AuthorDate: Wed Mar 26 02:06:55 2025 +0100
[Minor] Remove/reorder logical plan rules (#15421)
* Remove rules that execute twice
* Move optimization rule around
---
datafusion/optimizer/src/optimizer.rs | 4 +---
datafusion/optimizer/tests/optimizer_integration.rs | 4 ++--
datafusion/sqllogictest/test_files/explain.slt | 8 ++------
datafusion/sqllogictest/test_files/group_by.slt | 10 ++++------
4 files changed, 9 insertions(+), 17 deletions(-)
diff --git a/datafusion/optimizer/src/optimizer.rs
b/datafusion/optimizer/src/optimizer.rs
index 04d73fe3ab..ffbb95cb7f 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -230,7 +230,6 @@ impl Optimizer {
Arc::new(EliminateDuplicatedExpr::new()),
Arc::new(EliminateFilter::new()),
Arc::new(EliminateCrossJoin::new()),
- Arc::new(CommonSubexprEliminate::new()),
Arc::new(EliminateLimit::new()),
Arc::new(PropagateEmptyRelation::new()),
// Must be after PropagateEmptyRelation
@@ -243,9 +242,8 @@ impl Optimizer {
Arc::new(SingleDistinctToGroupBy::new()),
// The previous optimizations added expressions and projections,
// that might benefit from the following rules
- Arc::new(SimplifyExpressions::new()),
- Arc::new(CommonSubexprEliminate::new()),
Arc::new(EliminateGroupByConstant::new()),
+ Arc::new(CommonSubexprEliminate::new()),
Arc::new(OptimizeProjections::new()),
];
diff --git a/datafusion/optimizer/tests/optimizer_integration.rs
b/datafusion/optimizer/tests/optimizer_integration.rs
index 13d6b8de79..098027dd06 100644
--- a/datafusion/optimizer/tests/optimizer_integration.rs
+++ b/datafusion/optimizer/tests/optimizer_integration.rs
@@ -78,8 +78,8 @@ fn subquery_filter_with_cast() -> Result<()> {
\n SubqueryAlias: __scalar_sq_1\
\n Aggregate: groupBy=[[]], aggr=[[avg(CAST(test.col_int32 AS
Float64))]]\
\n Projection: test.col_int32\
- \n Filter: __common_expr_5 >= Date32(\"2002-05-08\") AND
__common_expr_5 <= Date32(\"2002-05-13\")\
- \n Projection: CAST(test.col_utf8 AS Date32) AS
__common_expr_5, test.col_int32\
+ \n Filter: __common_expr_4 >= Date32(\"2002-05-08\") AND
__common_expr_4 <= Date32(\"2002-05-13\")\
+ \n Projection: CAST(test.col_utf8 AS Date32) AS
__common_expr_4, test.col_int32\
\n TableScan: test projection=[col_int32, col_utf8]";
assert_eq!(expected, format!("{plan}"));
Ok(())
diff --git a/datafusion/sqllogictest/test_files/explain.slt
b/datafusion/sqllogictest/test_files/explain.slt
index 2e27ebe927..deff793e51 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -187,7 +187,6 @@ logical_plan after extract_equijoin_predicate SAME TEXT AS
ABOVE
logical_plan after eliminate_duplicated_expr SAME TEXT AS ABOVE
logical_plan after eliminate_filter SAME TEXT AS ABOVE
logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
-logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
logical_plan after eliminate_limit SAME TEXT AS ABOVE
logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
logical_plan after eliminate_one_union SAME TEXT AS ABOVE
@@ -196,9 +195,8 @@ logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
logical_plan after push_down_limit SAME TEXT AS ABOVE
logical_plan after push_down_filter SAME TEXT AS ABOVE
logical_plan after single_distinct_aggregation_to_group_by SAME TEXT AS ABOVE
-logical_plan after simplify_expressions SAME TEXT AS ABOVE
-logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
logical_plan after eliminate_group_by_constant SAME TEXT AS ABOVE
+logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
logical_plan after optimize_projections TableScan: simple_explain_test
projection=[a, b, c]
logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
@@ -210,7 +208,6 @@ logical_plan after extract_equijoin_predicate SAME TEXT AS
ABOVE
logical_plan after eliminate_duplicated_expr SAME TEXT AS ABOVE
logical_plan after eliminate_filter SAME TEXT AS ABOVE
logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
-logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
logical_plan after eliminate_limit SAME TEXT AS ABOVE
logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
logical_plan after eliminate_one_union SAME TEXT AS ABOVE
@@ -219,9 +216,8 @@ logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
logical_plan after push_down_limit SAME TEXT AS ABOVE
logical_plan after push_down_filter SAME TEXT AS ABOVE
logical_plan after single_distinct_aggregation_to_group_by SAME TEXT AS ABOVE
-logical_plan after simplify_expressions SAME TEXT AS ABOVE
-logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
logical_plan after eliminate_group_by_constant SAME TEXT AS ABOVE
+logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
logical_plan after optimize_projections SAME TEXT AS ABOVE
logical_plan TableScan: simple_explain_test projection=[a, b, c]
initial_physical_plan DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b,
c], file_type=csv, has_header=true
diff --git a/datafusion/sqllogictest/test_files/group_by.slt
b/datafusion/sqllogictest/test_files/group_by.slt
index 75baba3efc..5bf539e0b0 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -4188,9 +4188,8 @@ EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)),
MAX(DISTINCT CAST(x AS DOUBLE))
logical_plan
01)Projection: sum(alias1) AS sum(DISTINCT t1.x), max(alias1) AS max(DISTINCT
t1.x)
02)--Aggregate: groupBy=[[t1.y]], aggr=[[sum(alias1), max(alias1)]]
-03)----Aggregate: groupBy=[[t1.y, __common_expr_1 AS t1.x AS alias1]],
aggr=[[]]
-04)------Projection: CAST(t1.x AS Float64) AS __common_expr_1, t1.y
-05)--------TableScan: t1 projection=[x, y]
+03)----Aggregate: groupBy=[[t1.y, CAST(t1.x AS Float64) AS alias1]], aggr=[[]]
+04)------TableScan: t1 projection=[x, y]
physical_plan
01)ProjectionExec: expr=[sum(alias1)@1 as sum(DISTINCT t1.x), max(alias1)@2 as
max(DISTINCT t1.x)]
02)--AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[sum(alias1),
max(alias1)]
@@ -4201,9 +4200,8 @@ physical_plan
07)------------CoalesceBatchesExec: target_batch_size=2
08)--------------RepartitionExec: partitioning=Hash([y@0, alias1@1], 8),
input_partitions=8
09)----------------RepartitionExec: partitioning=RoundRobinBatch(8),
input_partitions=1
-10)------------------AggregateExec: mode=Partial, gby=[y@1 as y,
__common_expr_1@0 as alias1], aggr=[]
-11)--------------------ProjectionExec: expr=[CAST(x@0 AS Float64) as
__common_expr_1, y@1 as y]
-12)----------------------DataSourceExec: partitions=1, partition_sizes=[1]
+10)------------------AggregateExec: mode=Partial, gby=[y@1 as y, CAST(x@0 AS
Float64) as alias1], aggr=[]
+11)--------------------DataSourceExec: partitions=1, partition_sizes=[1]
# create an unbounded table that contains ordered timestamp.
statement ok
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]