This is an automated email from the ASF dual-hosted git repository.
jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 2423ff0 Fix Filter / where clause without column names is removed in
optimization pass (#225)
2423ff0 is described below
commit 2423ff0dd1fe9c0932c1cb8d1776efa3acd69554
Author: Daniƫl Heres <[email protected]>
AuthorDate: Fri Apr 30 10:11:14 2021 +0200
Fix Filter / where clause without column names is removed in optimization
pass (#225)
* Workaround where without columns
* Add some docs
* Remove print statement
* Bring back removed comment
---
datafusion/src/optimizer/filter_push_down.rs | 34 ++++++++++++++++++++++++----
datafusion/tests/sql.rs | 14 ++++++++++++
2 files changed, 44 insertions(+), 4 deletions(-)
diff --git a/datafusion/src/optimizer/filter_push_down.rs
b/datafusion/src/optimizer/filter_push_down.rs
index 4622e9f..356d497 100644
--- a/datafusion/src/optimizer/filter_push_down.rs
+++ b/datafusion/src/optimizer/filter_push_down.rs
@@ -237,17 +237,30 @@ fn optimize(plan: &LogicalPlan, mut state: State) ->
Result<LogicalPlan> {
let mut predicates = vec![];
split_members(predicate, &mut predicates);
+ // Predicates without referencing columns (WHERE FALSE, WHERE 1=1,
etc.)
+ let mut no_col_predicates = vec![];
+
predicates
.into_iter()
.try_for_each::<_, Result<()>>(|predicate| {
let mut columns: HashSet<String> = HashSet::new();
utils::expr_to_column_names(predicate, &mut columns)?;
- // collect the predicate
- state.filters.push((predicate.clone(), columns));
+ if columns.is_empty() {
+ no_col_predicates.push(predicate)
+ } else {
+ // collect the predicate
+ state.filters.push((predicate.clone(), columns));
+ }
Ok(())
})?;
-
- optimize(input, state)
+ // Predicates without columns will not be pushed down.
+ // As those contain only literals, they could be optimized using
constant folding
+ // and removal of WHERE TRUE / WHERE FALSE
+ if !no_col_predicates.is_empty() {
+ Ok(add_filter(optimize(input, state)?, &no_col_predicates))
+ } else {
+ optimize(input, state)
+ }
}
LogicalPlan::Projection {
input,
@@ -483,6 +496,19 @@ mod tests {
}
#[test]
+ fn filter_no_columns() -> Result<()> {
+ let table_scan = test_table_scan()?;
+ let plan = LogicalPlanBuilder::from(&table_scan)
+ .filter(lit(0i64).eq(lit(1i64)))?
+ .build()?;
+ let expected = "\
+ Filter: Int64(0) Eq Int64(1)\
+ \n TableScan: test projection=None";
+ assert_optimized_plan_eq(&plan, expected);
+ Ok(())
+ }
+
+ #[test]
fn filter_jump_2_plans() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index 79baeae..7169294 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -475,6 +475,20 @@ async fn csv_query_group_by_and_having_and_where() ->
Result<()> {
}
#[tokio::test]
+async fn all_where_empty() -> Result<()> {
+ let mut ctx = ExecutionContext::new();
+ register_aggregate_csv(&mut ctx)?;
+ let sql = "SELECT *
+ FROM aggregate_test_100
+ WHERE 1=2";
+ let mut actual = execute(&mut ctx, sql).await;
+ actual.sort();
+ let expected: Vec<Vec<String>> = vec![];
+ assert_eq!(expected, actual);
+ Ok(())
+}
+
+#[tokio::test]
async fn csv_query_having_without_group_by() -> Result<()> {
let mut ctx = ExecutionContext::new();
register_aggregate_csv(&mut ctx)?;