This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new f862eeff1 restore the between for simplify expression (#3661)
f862eeff1 is described below

commit f862eeff1285ad84489163f79936915a08b84d83
Author: Kun Liu <[email protected]>
AuthorDate: Fri Sep 30 19:03:57 2022 +0800

    restore the between for simplify expression (#3661)
---
 datafusion/core/tests/sql/predicates.rs          |  1 -
 datafusion/core/tests/sql/select.rs              |  3 +-
 datafusion/optimizer/src/simplify_expressions.rs | 63 +++++++++---------------
 datafusion/optimizer/tests/integration-test.rs   |  6 +--
 4 files changed, 25 insertions(+), 48 deletions(-)

diff --git a/datafusion/core/tests/sql/predicates.rs 
b/datafusion/core/tests/sql/predicates.rs
index 15e89f7b3..e075433ac 100644
--- a/datafusion/core/tests/sql/predicates.rs
+++ b/datafusion/core/tests/sql/predicates.rs
@@ -388,7 +388,6 @@ async fn csv_in_set_test() -> Result<()> {
 #[ignore]
 // https://github.com/apache/arrow-datafusion/issues/3635
 async fn multiple_or_predicates() -> Result<()> {
-    // TODO https://github.com/apache/arrow-datafusion/issues/3587
     let ctx = SessionContext::new();
     register_tpch_csv(&ctx, "lineitem").await?;
     register_tpch_csv(&ctx, "part").await?;
diff --git a/datafusion/core/tests/sql/select.rs 
b/datafusion/core/tests/sql/select.rs
index e6c4debdb..1d124a5dc 100644
--- a/datafusion/core/tests/sql/select.rs
+++ b/datafusion/core/tests/sql/select.rs
@@ -522,11 +522,10 @@ async fn use_between_expression_in_select_query() -> 
Result<()> {
         .unwrap()
         .to_string();
 
-    // TODO https://github.com/apache/arrow-datafusion/issues/3587
     // Only test that the projection exprs are correct, rather than entire 
output
     let needle = "ProjectionExec: expr=[c1@0 >= 2 AND c1@0 <= 3 as test.c1 
BETWEEN Int64(2) AND Int64(3)]";
     assert_contains!(&formatted, needle);
-    let needle = "Projection: #test.c1 BETWEEN Int64(2) AND Int64(3)";
+    let needle = "Projection: #test.c1 >= Int64(2) AND #test.c1 <= Int64(3)";
     assert_contains!(&formatted, needle);
     Ok(())
 }
diff --git a/datafusion/optimizer/src/simplify_expressions.rs 
b/datafusion/optimizer/src/simplify_expressions.rs
index 01b46cc92..a10f92c65 100644
--- a/datafusion/optimizer/src/simplify_expressions.rs
+++ b/datafusion/optimizer/src/simplify_expressions.rs
@@ -863,28 +863,23 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, 
S> {
             //
             // Rules for Between
             //
-            // TODO https://github.com/apache/arrow-datafusion/issues/3587
-            // we remove between optimization temporarily, and will recover it 
after above issue fixed.
-            // We should check compatibility for `expr` `low` and `high` expr 
first.
-            // The rule only can work, when these three exprs can be casted to 
a same data type.
 
             // a between 3 and 5  -->  a >= 3 AND a <=5
             // a not between 3 and 5  -->  a < 3 OR a > 5
-
-            // Between {
-            //     expr,
-            //     low,
-            //     high,
-            //     negated,
-            // } => {
-            //     if negated {
-            //         let l = *expr.clone();
-            //         let r = *expr;
-            //         or(l.lt(*low), r.gt(*high))
-            //     } else {
-            //         and(expr.clone().gt_eq(*low), expr.lt_eq(*high))
-            //     }
-            // }
+            Between {
+                expr,
+                low,
+                high,
+                negated,
+            } => {
+                if negated {
+                    let l = *expr.clone();
+                    let r = *expr;
+                    or(l.lt(*low), r.gt(*high))
+                } else {
+                    and(expr.clone().gt_eq(*low), expr.lt_eq(*high))
+                }
+            }
             expr => {
                 // no additional rewrites possible
                 expr
@@ -1703,8 +1698,6 @@ mod tests {
         // null || false is always null
         assert_eq!(simplify(lit_bool_null().or(lit(false))), lit_bool_null(),);
 
-        // TODO change the result
-        // https://github.com/apache/arrow-datafusion/issues/3587
         // ( c1 BETWEEN Int32(0) AND Int32(10) ) OR Boolean(NULL)
         // it can be either NULL or  TRUE depending on the value of `c1 
BETWEEN Int32(0) AND Int32(10)`
         // and should not be rewritten
@@ -1714,15 +1707,13 @@ mod tests {
             low: Box::new(lit(0)),
             high: Box::new(lit(10)),
         };
-        let between_expr = expr.clone();
         let expr = expr.or(lit_bool_null());
         let result = simplify(expr);
 
-        let expected_expr = or(between_expr, lit_bool_null());
-        // let expected_expr = or(
-        //    and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
-        //    lit_bool_null(),
-        //);
+        let expected_expr = or(
+            and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
+            lit_bool_null(),
+        );
         assert_eq!(expected_expr, result);
     }
 
@@ -1745,8 +1736,6 @@ mod tests {
         // null && false is always false
         assert_eq!(simplify(lit_bool_null().and(lit(false))), lit(false),);
 
-        // TODO change the result
-        // https://github.com/apache/arrow-datafusion/issues/3587
         // c1 BETWEEN Int32(0) AND Int32(10) AND Boolean(NULL)
         // it can be either NULL or FALSE depending on the value of `c1 
BETWEEN Int32(0) AND Int32(10)`
         // and the Boolean(NULL) should remain
@@ -1756,21 +1745,17 @@ mod tests {
             low: Box::new(lit(0)),
             high: Box::new(lit(10)),
         };
-        let between_expr = expr.clone();
         let expr = expr.and(lit_bool_null());
         let result = simplify(expr);
 
-        let expected_expr = and(between_expr, lit_bool_null());
-        // let expected_expr = and(
-        //    and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
-        //    lit_bool_null(),
-        // );
+        let expected_expr = and(
+            and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
+            lit_bool_null(),
+        );
         assert_eq!(expected_expr, result);
     }
 
     #[test]
-    #[ignore]
-    // https://github.com/apache/arrow-datafusion/issues/3587
     fn simplify_expr_between() {
         // c2 between 3 and 4 is c2 >= 3 and c2 <= 4
         let expr = Expr::Between {
@@ -2360,8 +2345,6 @@ mod tests {
     }
 
     #[test]
-    #[ignore]
-    // https://github.com/apache/arrow-datafusion/issues/3587
     fn simplify_not_between() {
         let table_scan = test_table_scan();
         let qual = Expr::Between {
@@ -2383,8 +2366,6 @@ mod tests {
     }
 
     #[test]
-    #[ignore]
-    // https://github.com/apache/arrow-datafusion/issues/3587
     fn simplify_not_not_between() {
         let table_scan = test_table_scan();
         let qual = Expr::Between {
diff --git a/datafusion/optimizer/tests/integration-test.rs 
b/datafusion/optimizer/tests/integration-test.rs
index 5f2760316..61bfafed7 100644
--- a/datafusion/optimizer/tests/integration-test.rs
+++ b/datafusion/optimizer/tests/integration-test.rs
@@ -79,13 +79,12 @@ fn intersect() -> Result<()> {
 
 #[test]
 fn between_date32_plus_interval() -> Result<()> {
-    // TODO: https://github.com/apache/arrow-datafusion/issues/3587
     let sql = "SELECT count(1) FROM test \
     WHERE col_date32 between '1998-03-18' AND cast('1998-03-18' as date) + 
INTERVAL '90 days'";
     let plan = test_sql(sql)?;
     let expected =
         "Projection: #COUNT(UInt8(1))\n  Aggregate: groupBy=[[]], 
aggr=[[COUNT(UInt8(1))]]\
-        \n    Filter: #test.col_date32 BETWEEN Date32(\"10303\") AND 
Date32(\"10393\")\
+        \n    Filter: #test.col_date32 >= Date32(\"10303\") AND 
#test.col_date32 <= Date32(\"10393\")\
         \n      TableScan: test projection=[col_date32]";
     assert_eq!(expected, format!("{:?}", plan));
     Ok(())
@@ -93,13 +92,12 @@ fn between_date32_plus_interval() -> Result<()> {
 
 #[test]
 fn between_date64_plus_interval() -> Result<()> {
-    // TODO: https://github.com/apache/arrow-datafusion/issues/3587
     let sql = "SELECT count(1) FROM test \
     WHERE col_date64 between '1998-03-18T00:00:00' AND cast('1998-03-18' as 
date) + INTERVAL '90 days'";
     let plan = test_sql(sql)?;
     let expected =
         "Projection: #COUNT(UInt8(1))\n  Aggregate: groupBy=[[]], 
aggr=[[COUNT(UInt8(1))]]\
-        \n    Filter: #test.col_date64 BETWEEN Date64(\"890179200000\") AND 
Date64(\"897955200000\")\
+        \n    Filter: #test.col_date64 >= Date64(\"890179200000\") AND 
#test.col_date64 <= Date64(\"897955200000\")\
         \n      TableScan: test projection=[col_date64]";
     assert_eq!(expected, format!("{:?}", plan));
     Ok(())

Reply via email to