This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new f862eeff1 restore the between for simplify expression (#3661)
f862eeff1 is described below
commit f862eeff1285ad84489163f79936915a08b84d83
Author: Kun Liu <[email protected]>
AuthorDate: Fri Sep 30 19:03:57 2022 +0800
restore the between for simplify expression (#3661)
---
datafusion/core/tests/sql/predicates.rs | 1 -
datafusion/core/tests/sql/select.rs | 3 +-
datafusion/optimizer/src/simplify_expressions.rs | 63 +++++++++---------------
datafusion/optimizer/tests/integration-test.rs | 6 +--
4 files changed, 25 insertions(+), 48 deletions(-)
diff --git a/datafusion/core/tests/sql/predicates.rs
b/datafusion/core/tests/sql/predicates.rs
index 15e89f7b3..e075433ac 100644
--- a/datafusion/core/tests/sql/predicates.rs
+++ b/datafusion/core/tests/sql/predicates.rs
@@ -388,7 +388,6 @@ async fn csv_in_set_test() -> Result<()> {
#[ignore]
// https://github.com/apache/arrow-datafusion/issues/3635
async fn multiple_or_predicates() -> Result<()> {
- // TODO https://github.com/apache/arrow-datafusion/issues/3587
let ctx = SessionContext::new();
register_tpch_csv(&ctx, "lineitem").await?;
register_tpch_csv(&ctx, "part").await?;
diff --git a/datafusion/core/tests/sql/select.rs
b/datafusion/core/tests/sql/select.rs
index e6c4debdb..1d124a5dc 100644
--- a/datafusion/core/tests/sql/select.rs
+++ b/datafusion/core/tests/sql/select.rs
@@ -522,11 +522,10 @@ async fn use_between_expression_in_select_query() ->
Result<()> {
.unwrap()
.to_string();
- // TODO https://github.com/apache/arrow-datafusion/issues/3587
// Only test that the projection exprs are correct, rather than entire
output
let needle = "ProjectionExec: expr=[c1@0 >= 2 AND c1@0 <= 3 as test.c1
BETWEEN Int64(2) AND Int64(3)]";
assert_contains!(&formatted, needle);
- let needle = "Projection: #test.c1 BETWEEN Int64(2) AND Int64(3)";
+ let needle = "Projection: #test.c1 >= Int64(2) AND #test.c1 <= Int64(3)";
assert_contains!(&formatted, needle);
Ok(())
}
diff --git a/datafusion/optimizer/src/simplify_expressions.rs
b/datafusion/optimizer/src/simplify_expressions.rs
index 01b46cc92..a10f92c65 100644
--- a/datafusion/optimizer/src/simplify_expressions.rs
+++ b/datafusion/optimizer/src/simplify_expressions.rs
@@ -863,28 +863,23 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a,
S> {
//
// Rules for Between
//
- // TODO https://github.com/apache/arrow-datafusion/issues/3587
- // we remove between optimization temporarily, and will recover it
after above issue fixed.
- // We should check compatibility for `expr` `low` and `high` expr
first.
- // The rule only can work, when these three exprs can be casted to
a same data type.
// a between 3 and 5 --> a >= 3 AND a <=5
// a not between 3 and 5 --> a < 3 OR a > 5
-
- // Between {
- // expr,
- // low,
- // high,
- // negated,
- // } => {
- // if negated {
- // let l = *expr.clone();
- // let r = *expr;
- // or(l.lt(*low), r.gt(*high))
- // } else {
- // and(expr.clone().gt_eq(*low), expr.lt_eq(*high))
- // }
- // }
+ Between {
+ expr,
+ low,
+ high,
+ negated,
+ } => {
+ if negated {
+ let l = *expr.clone();
+ let r = *expr;
+ or(l.lt(*low), r.gt(*high))
+ } else {
+ and(expr.clone().gt_eq(*low), expr.lt_eq(*high))
+ }
+ }
expr => {
// no additional rewrites possible
expr
@@ -1703,8 +1698,6 @@ mod tests {
// null || false is always null
assert_eq!(simplify(lit_bool_null().or(lit(false))), lit_bool_null(),);
- // TODO change the result
- // https://github.com/apache/arrow-datafusion/issues/3587
// ( c1 BETWEEN Int32(0) AND Int32(10) ) OR Boolean(NULL)
// it can be either NULL or TRUE depending on the value of `c1
BETWEEN Int32(0) AND Int32(10)`
// and should not be rewritten
@@ -1714,15 +1707,13 @@ mod tests {
low: Box::new(lit(0)),
high: Box::new(lit(10)),
};
- let between_expr = expr.clone();
let expr = expr.or(lit_bool_null());
let result = simplify(expr);
- let expected_expr = or(between_expr, lit_bool_null());
- // let expected_expr = or(
- // and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
- // lit_bool_null(),
- //);
+ let expected_expr = or(
+ and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
+ lit_bool_null(),
+ );
assert_eq!(expected_expr, result);
}
@@ -1745,8 +1736,6 @@ mod tests {
// null && false is always false
assert_eq!(simplify(lit_bool_null().and(lit(false))), lit(false),);
- // TODO change the result
- // https://github.com/apache/arrow-datafusion/issues/3587
// c1 BETWEEN Int32(0) AND Int32(10) AND Boolean(NULL)
// it can be either NULL or FALSE depending on the value of `c1
BETWEEN Int32(0) AND Int32(10)`
// and the Boolean(NULL) should remain
@@ -1756,21 +1745,17 @@ mod tests {
low: Box::new(lit(0)),
high: Box::new(lit(10)),
};
- let between_expr = expr.clone();
let expr = expr.and(lit_bool_null());
let result = simplify(expr);
- let expected_expr = and(between_expr, lit_bool_null());
- // let expected_expr = and(
- // and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
- // lit_bool_null(),
- // );
+ let expected_expr = and(
+ and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
+ lit_bool_null(),
+ );
assert_eq!(expected_expr, result);
}
#[test]
- #[ignore]
- // https://github.com/apache/arrow-datafusion/issues/3587
fn simplify_expr_between() {
// c2 between 3 and 4 is c2 >= 3 and c2 <= 4
let expr = Expr::Between {
@@ -2360,8 +2345,6 @@ mod tests {
}
#[test]
- #[ignore]
- // https://github.com/apache/arrow-datafusion/issues/3587
fn simplify_not_between() {
let table_scan = test_table_scan();
let qual = Expr::Between {
@@ -2383,8 +2366,6 @@ mod tests {
}
#[test]
- #[ignore]
- // https://github.com/apache/arrow-datafusion/issues/3587
fn simplify_not_not_between() {
let table_scan = test_table_scan();
let qual = Expr::Between {
diff --git a/datafusion/optimizer/tests/integration-test.rs
b/datafusion/optimizer/tests/integration-test.rs
index 5f2760316..61bfafed7 100644
--- a/datafusion/optimizer/tests/integration-test.rs
+++ b/datafusion/optimizer/tests/integration-test.rs
@@ -79,13 +79,12 @@ fn intersect() -> Result<()> {
#[test]
fn between_date32_plus_interval() -> Result<()> {
- // TODO: https://github.com/apache/arrow-datafusion/issues/3587
let sql = "SELECT count(1) FROM test \
WHERE col_date32 between '1998-03-18' AND cast('1998-03-18' as date) +
INTERVAL '90 days'";
let plan = test_sql(sql)?;
let expected =
"Projection: #COUNT(UInt8(1))\n Aggregate: groupBy=[[]],
aggr=[[COUNT(UInt8(1))]]\
- \n Filter: #test.col_date32 BETWEEN Date32(\"10303\") AND
Date32(\"10393\")\
+ \n Filter: #test.col_date32 >= Date32(\"10303\") AND
#test.col_date32 <= Date32(\"10393\")\
\n TableScan: test projection=[col_date32]";
assert_eq!(expected, format!("{:?}", plan));
Ok(())
@@ -93,13 +92,12 @@ fn between_date32_plus_interval() -> Result<()> {
#[test]
fn between_date64_plus_interval() -> Result<()> {
- // TODO: https://github.com/apache/arrow-datafusion/issues/3587
let sql = "SELECT count(1) FROM test \
WHERE col_date64 between '1998-03-18T00:00:00' AND cast('1998-03-18' as
date) + INTERVAL '90 days'";
let plan = test_sql(sql)?;
let expected =
"Projection: #COUNT(UInt8(1))\n Aggregate: groupBy=[[]],
aggr=[[COUNT(UInt8(1))]]\
- \n Filter: #test.col_date64 BETWEEN Date64(\"890179200000\") AND
Date64(\"897955200000\")\
+ \n Filter: #test.col_date64 >= Date64(\"890179200000\") AND
#test.col_date64 <= Date64(\"897955200000\")\
\n TableScan: test projection=[col_date64]";
assert_eq!(expected, format!("{:?}", plan));
Ok(())