This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new b5e034b Remove unnecessary projection in logical plan optimization
phase (#747)
b5e034b is described below
commit b5e034b1a4f47a47de68d176b98042a1e4df7d58
Author: Ruihang Xia <[email protected]>
AuthorDate: Tue Jul 20 00:29:27 2021 +0800
Remove unnecessary projection in logical plan optimization phase (#747)
* eliminate super-set project
Signed-off-by: Ruihang Xia <[email protected]>
* keep projection right before table scan
Signed-off-by: Ruihang Xia <[email protected]>
* tidy
Signed-off-by: Ruihang Xia <[email protected]>
---
datafusion/src/optimizer/projection_push_down.rs | 69 ++++++++++++++++++++++--
1 file changed, 66 insertions(+), 3 deletions(-)
diff --git a/datafusion/src/optimizer/projection_push_down.rs
b/datafusion/src/optimizer/projection_push_down.rs
index 0272b9f..089dca2 100644
--- a/datafusion/src/optimizer/projection_push_down.rs
+++ b/datafusion/src/optimizer/projection_push_down.rs
@@ -173,7 +173,17 @@ fn optimize_plan(
true,
execution_props,
)?;
- if new_fields.is_empty() {
+
+ let new_required_columns_optimized = new_input
+ .schema()
+ .fields()
+ .iter()
+ .map(|f| f.qualified_column())
+ .collect::<HashSet<Column>>();
+
+ if new_fields.is_empty()
+ || (has_projection && &new_required_columns_optimized ==
required_columns)
+ {
// no need for an expression at all
Ok(new_input)
} else {
@@ -497,6 +507,60 @@ mod tests {
}
#[test]
+ fn redundunt_project() -> Result<()> {
+ let table_scan = test_table_scan()?;
+
+ let plan = LogicalPlanBuilder::from(table_scan)
+ .project(vec![col("a"), col("b"), col("c")])?
+ .project(vec![col("a"), col("c"), col("b")])?
+ .build()?;
+ let expected = "Projection: #test.a, #test.c, #test.b\
+ \n TableScan: test projection=Some([0, 1, 2])";
+
+ assert_optimized_plan_eq(&plan, expected);
+
+ Ok(())
+ }
+
+ #[test]
+ fn reorder_projection() -> Result<()> {
+ let table_scan = test_table_scan()?;
+
+ let plan = LogicalPlanBuilder::from(table_scan)
+ .project(vec![col("c"), col("b"), col("a")])?
+ .build()?;
+ let expected = "Projection: #test.c, #test.b, #test.a\
+ \n TableScan: test projection=Some([0, 1, 2])";
+
+ assert_optimized_plan_eq(&plan, expected);
+
+ Ok(())
+ }
+
+ #[test]
+ fn noncontiguous_redundunt_projection() -> Result<()> {
+ let table_scan = test_table_scan()?;
+
+ let plan = LogicalPlanBuilder::from(table_scan)
+ .project(vec![col("c"), col("b"), col("a")])?
+ .filter(col("c").gt(lit(1)))?
+ .project(vec![col("c"), col("a"), col("b")])?
+ .filter(col("b").gt(lit(1)))?
+ .filter(col("a").gt(lit(1)))?
+ .project(vec![col("a"), col("c"), col("b")])?
+ .build()?;
+ let expected = "Projection: #test.a, #test.c, #test.b\
+ \n Filter: #test.a Gt Int32(1)\
+ \n Filter: #test.b Gt Int32(1)\
+ \n Filter: #test.c Gt Int32(1)\
+ \n TableScan: test projection=Some([0, 1, 2])";
+
+ assert_optimized_plan_eq(&plan, expected);
+
+ Ok(())
+ }
+
+ #[test]
fn join_schema_trim_full_join_column_projection() -> Result<()> {
let table_scan = test_table_scan()?;
@@ -812,8 +876,7 @@ mod tests {
assert_fields_eq(&plan, vec!["c", "a", "MAX(test.b)"]);
- let expected = "\
- Projection: #test.c, #test.a, #MAX(test.b)\
+ let expected = "Projection: #test.c, #test.a, #MAX(test.b)\
\n Filter: #test.c Gt Int32(1)\
\n Aggregate: groupBy=[[#test.a, #test.c]], aggr=[[MAX(#test.b)]]\
\n TableScan: test projection=Some([0, 1, 2])";