This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 2423ff0  Fix Filter / where clause without column names is removed in 
optimization pass (#225)
2423ff0 is described below

commit 2423ff0dd1fe9c0932c1cb8d1776efa3acd69554
Author: DaniĆ«l Heres <[email protected]>
AuthorDate: Fri Apr 30 10:11:14 2021 +0200

    Fix Filter / where clause without column names is removed in optimization 
pass (#225)
    
    * Workaround where without columns
    
    * Add some docs
    
    * Remove print statement
    
    * Bring back removed comment
---
 datafusion/src/optimizer/filter_push_down.rs | 34 ++++++++++++++++++++++++----
 datafusion/tests/sql.rs                      | 14 ++++++++++++
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/datafusion/src/optimizer/filter_push_down.rs 
b/datafusion/src/optimizer/filter_push_down.rs
index 4622e9f..356d497 100644
--- a/datafusion/src/optimizer/filter_push_down.rs
+++ b/datafusion/src/optimizer/filter_push_down.rs
@@ -237,17 +237,30 @@ fn optimize(plan: &LogicalPlan, mut state: State) -> 
Result<LogicalPlan> {
             let mut predicates = vec![];
             split_members(predicate, &mut predicates);
 
+            // Predicates without referencing columns (WHERE FALSE, WHERE 1=1, 
etc.)
+            let mut no_col_predicates = vec![];
+
             predicates
                 .into_iter()
                 .try_for_each::<_, Result<()>>(|predicate| {
                     let mut columns: HashSet<String> = HashSet::new();
                     utils::expr_to_column_names(predicate, &mut columns)?;
-                    // collect the predicate
-                    state.filters.push((predicate.clone(), columns));
+                    if columns.is_empty() {
+                        no_col_predicates.push(predicate)
+                    } else {
+                        // collect the predicate
+                        state.filters.push((predicate.clone(), columns));
+                    }
                     Ok(())
                 })?;
-
-            optimize(input, state)
+            // Predicates without columns will not be pushed down.
+            // As those contain only literals, they could be optimized using 
constant folding
+            // and removal of WHERE TRUE / WHERE FALSE
+            if !no_col_predicates.is_empty() {
+                Ok(add_filter(optimize(input, state)?, &no_col_predicates))
+            } else {
+                optimize(input, state)
+            }
         }
         LogicalPlan::Projection {
             input,
@@ -483,6 +496,19 @@ mod tests {
     }
 
     #[test]
+    fn filter_no_columns() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let plan = LogicalPlanBuilder::from(&table_scan)
+            .filter(lit(0i64).eq(lit(1i64)))?
+            .build()?;
+        let expected = "\
+            Filter: Int64(0) Eq Int64(1)\
+            \n  TableScan: test projection=None";
+        assert_optimized_plan_eq(&plan, expected);
+        Ok(())
+    }
+
+    #[test]
     fn filter_jump_2_plans() -> Result<()> {
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(&table_scan)
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index 79baeae..7169294 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -475,6 +475,20 @@ async fn csv_query_group_by_and_having_and_where() -> 
Result<()> {
 }
 
 #[tokio::test]
+async fn all_where_empty() -> Result<()> {
+    let mut ctx = ExecutionContext::new();
+    register_aggregate_csv(&mut ctx)?;
+    let sql = "SELECT *
+               FROM aggregate_test_100
+               WHERE 1=2";
+    let mut actual = execute(&mut ctx, sql).await;
+    actual.sort();
+    let expected: Vec<Vec<String>> = vec![];
+    assert_eq!(expected, actual);
+    Ok(())
+}
+
+#[tokio::test]
 async fn csv_query_having_without_group_by() -> Result<()> {
     let mut ctx = ExecutionContext::new();
     register_aggregate_csv(&mut ctx)?;

Reply via email to