This is an automated email from the ASF dual-hosted git repository.
wjones127 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 393e48f988 feat: support arbitrary binaryexpr simplifications (#8256)
393e48f988 is described below
commit 393e48f98872c696a90fce033fa584533d2326fa
Author: Will Jones <[email protected]>
AuthorDate: Sat Nov 18 08:30:05 2023 -0800
feat: support arbitrary binaryexpr simplifications (#8256)
---
.../src/simplify_expressions/guarantees.rs | 73 +++++++++++++---------
1 file changed, 44 insertions(+), 29 deletions(-)
diff --git a/datafusion/optimizer/src/simplify_expressions/guarantees.rs
b/datafusion/optimizer/src/simplify_expressions/guarantees.rs
index 5504d7d76e..0204698571 100644
--- a/datafusion/optimizer/src/simplify_expressions/guarantees.rs
+++ b/datafusion/optimizer/src/simplify_expressions/guarantees.rs
@@ -20,7 +20,7 @@
//! [`ExprSimplifier::with_guarantees()`]:
crate::simplify_expressions::expr_simplifier::ExprSimplifier::with_guarantees
use datafusion_common::{tree_node::TreeNodeRewriter, DataFusionError, Result};
use datafusion_expr::{expr::InList, lit, Between, BinaryExpr, Expr};
-use std::collections::HashMap;
+use std::{borrow::Cow, collections::HashMap};
use datafusion_physical_expr::intervals::{Interval, IntervalBound,
NullableInterval};
@@ -103,37 +103,44 @@ impl<'a> TreeNodeRewriter for GuaranteeRewriter<'a> {
}
Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
- // We only support comparisons for now
- if !op.is_comparison_operator() {
- return Ok(expr);
- };
-
- // Check if this is a comparison between a column and literal
- let (col, op, value) = match (left.as_ref(), right.as_ref()) {
- (Expr::Column(_), Expr::Literal(value)) => (left, *op,
value),
- (Expr::Literal(value), Expr::Column(_)) => {
- // If we can swap the op, we can simplify the
expression
- if let Some(op) = op.swap() {
- (right, op, value)
+ // The left or right side of expression might either have a
guarantee
+ // or be a literal. Either way, we can resolve them to a
NullableInterval.
+ let left_interval = self
+ .guarantees
+ .get(left.as_ref())
+ .map(|interval| Cow::Borrowed(*interval))
+ .or_else(|| {
+ if let Expr::Literal(value) = left.as_ref() {
+ Some(Cow::Owned(value.clone().into()))
} else {
- return Ok(expr);
+ None
+ }
+ });
+ let right_interval = self
+ .guarantees
+ .get(right.as_ref())
+ .map(|interval| Cow::Borrowed(*interval))
+ .or_else(|| {
+ if let Expr::Literal(value) = right.as_ref() {
+ Some(Cow::Owned(value.clone().into()))
+ } else {
+ None
+ }
+ });
+
+ match (left_interval, right_interval) {
+ (Some(left_interval), Some(right_interval)) => {
+ let result =
+ left_interval.apply_operator(op,
right_interval.as_ref())?;
+ if result.is_certainly_true() {
+ Ok(lit(true))
+ } else if result.is_certainly_false() {
+ Ok(lit(false))
+ } else {
+ Ok(expr)
}
}
- _ => return Ok(expr),
- };
-
- if let Some(col_interval) = self.guarantees.get(col.as_ref()) {
- let result =
- col_interval.apply_operator(&op,
&value.clone().into())?;
- if result.is_certainly_true() {
- Ok(lit(true))
- } else if result.is_certainly_false() {
- Ok(lit(false))
- } else {
- Ok(expr)
- }
- } else {
- Ok(expr)
+ _ => Ok(expr),
}
}
@@ -262,6 +269,13 @@ mod tests {
values: Interval::make(Some(1_i32), Some(3_i32), (true,
false)),
},
),
+ // s.y ∈ (1, 3] (not null)
+ (
+ col("s").field("y"),
+ NullableInterval::NotNull {
+ values: Interval::make(Some(1_i32), Some(3_i32), (true,
false)),
+ },
+ ),
];
let mut rewriter = GuaranteeRewriter::new(guarantees.iter());
@@ -269,6 +283,7 @@ mod tests {
// (original_expr, expected_simplification)
let simplified_cases = &[
(col("x").lt_eq(lit(1)), false),
+ (col("s").field("y").lt_eq(lit(1)), false),
(col("x").lt_eq(lit(3)), true),
(col("x").gt(lit(3)), false),
(col("x").gt(lit(1)), true),