This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new a32c40894 `Expr` variants for boolean operations (#3275)
a32c40894 is described below
commit a32c408940e6e611f70828829cf97667f7bdd2c9
Author: Sarah Yurick <[email protected]>
AuthorDate: Tue Aug 30 13:55:40 2022 -0700
`Expr` variants for boolean operations (#3275)
* add boolean operators as Expr
* minor not unk changes
* Update to_proto.rs
* protobuf support
Co-authored-by: Andy Grove <[email protected]>
---
datafusion/core/src/datasource/listing/helpers.rs | 6 +++
datafusion/core/src/physical_plan/planner.rs | 24 +++++++++++
datafusion/expr/src/expr.rs | 48 ++++++++++++++++++++++
datafusion/expr/src/expr_rewriter.rs | 8 ++++
datafusion/expr/src/expr_schema.rs | 18 +++++++-
datafusion/expr/src/expr_visitor.rs | 6 +++
datafusion/expr/src/utils.rs | 6 +++
.../optimizer/src/common_subexpr_eliminate.rs | 18 ++++++++
datafusion/optimizer/src/simplify_expressions.rs | 6 +++
datafusion/proto/proto/datafusion.proto | 31 ++++++++++++++
datafusion/proto/src/from_proto.rs | 18 ++++++++
datafusion/proto/src/to_proto.rs | 48 ++++++++++++++++++++++
datafusion/sql/src/utils.rs | 18 ++++++++
13 files changed, 253 insertions(+), 2 deletions(-)
diff --git a/datafusion/core/src/datasource/listing/helpers.rs
b/datafusion/core/src/datasource/listing/helpers.rs
index 6c018eda3..023d319d7 100644
--- a/datafusion/core/src/datasource/listing/helpers.rs
+++ b/datafusion/core/src/datasource/listing/helpers.rs
@@ -84,6 +84,12 @@ impl ExpressionVisitor for ApplicabilityVisitor<'_> {
| Expr::Not(_)
| Expr::IsNotNull(_)
| Expr::IsNull(_)
+ | Expr::IsTrue(_)
+ | Expr::IsFalse(_)
+ | Expr::IsUnknown(_)
+ | Expr::IsNotTrue(_)
+ | Expr::IsNotFalse(_)
+ | Expr::IsNotUnknown(_)
| Expr::Negative(_)
| Expr::Cast { .. }
| Expr::TryCast { .. }
diff --git a/datafusion/core/src/physical_plan/planner.rs
b/datafusion/core/src/physical_plan/planner.rs
index 658da6a0f..747cd1a20 100644
--- a/datafusion/core/src/physical_plan/planner.rs
+++ b/datafusion/core/src/physical_plan/planner.rs
@@ -152,6 +152,30 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) ->
Result<String> {
let expr = create_physical_name(expr, false)?;
Ok(format!("{} IS NOT NULL", expr))
}
+ Expr::IsTrue(expr) => {
+ let expr = create_physical_name(expr, false)?;
+ Ok(format!("{} IS TRUE", expr))
+ }
+ Expr::IsFalse(expr) => {
+ let expr = create_physical_name(expr, false)?;
+ Ok(format!("{} IS FALSE", expr))
+ }
+ Expr::IsUnknown(expr) => {
+ let expr = create_physical_name(expr, false)?;
+ Ok(format!("{} IS UNKNOWN", expr))
+ }
+ Expr::IsNotTrue(expr) => {
+ let expr = create_physical_name(expr, false)?;
+ Ok(format!("{} IS NOT TRUE", expr))
+ }
+ Expr::IsNotFalse(expr) => {
+ let expr = create_physical_name(expr, false)?;
+ Ok(format!("{} IS NOT FALSE", expr))
+ }
+ Expr::IsNotUnknown(expr) => {
+ let expr = create_physical_name(expr, false)?;
+ Ok(format!("{} IS NOT UNKNOWN", expr))
+ }
Expr::GetIndexedField { expr, key } => {
let expr = create_physical_name(expr, false)?;
Ok(format!("{}[{}]", expr, key))
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index ba6f7a96c..f52573b2e 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -106,6 +106,18 @@ pub enum Expr {
IsNotNull(Box<Expr>),
/// Whether an expression is Null. This expression is never null.
IsNull(Box<Expr>),
+ /// Whether an expression is True. Boolean operation
+ IsTrue(Box<Expr>),
+ /// Whether an expression is False. Boolean operation
+ IsFalse(Box<Expr>),
+ /// Whether an expression is Unknown. Boolean operation
+ IsUnknown(Box<Expr>),
+ /// Whether an expression is not True. Boolean operation
+ IsNotTrue(Box<Expr>),
+ /// Whether an expression is not False. Boolean operation
+ IsNotFalse(Box<Expr>),
+ /// Whether an expression is not Unknown. Boolean operation
+ IsNotUnknown(Box<Expr>),
/// arithmetic negation of an expression, the operand must be of a signed
numeric data type
Negative(Box<Expr>),
/// Returns the field of a [`arrow::array::ListArray`] or
[`arrow::array::StructArray`] by key
@@ -335,6 +347,12 @@ impl Expr {
Expr::InSubquery { .. } => "InSubquery",
Expr::IsNotNull(..) => "IsNotNull",
Expr::IsNull(..) => "IsNull",
+ Expr::IsTrue(..) => "IsTrue",
+ Expr::IsFalse(..) => "IsFalse",
+ Expr::IsUnknown(..) => "IsUnknown",
+ Expr::IsNotTrue(..) => "IsNotTrue",
+ Expr::IsNotFalse(..) => "IsNotFalse",
+ Expr::IsNotUnknown(..) => "IsNotUnknown",
Expr::Literal(..) => "Literal",
Expr::Negative(..) => "Negative",
Expr::Not(..) => "Not",
@@ -548,6 +566,12 @@ impl fmt::Debug for Expr {
Expr::Negative(expr) => write!(f, "(- {:?})", expr),
Expr::IsNull(expr) => write!(f, "{:?} IS NULL", expr),
Expr::IsNotNull(expr) => write!(f, "{:?} IS NOT NULL", expr),
+ Expr::IsTrue(expr) => write!(f, "{:?} IS TRUE", expr),
+ Expr::IsFalse(expr) => write!(f, "{:?} IS FALSE", expr),
+ Expr::IsUnknown(expr) => write!(f, "{:?} IS UNKNOWN", expr),
+ Expr::IsNotTrue(expr) => write!(f, "{:?} IS NOT TRUE", expr),
+ Expr::IsNotFalse(expr) => write!(f, "{:?} IS NOT FALSE", expr),
+ Expr::IsNotUnknown(expr) => write!(f, "{:?} IS NOT UNKNOWN", expr),
Expr::Exists {
subquery,
negated: true,
@@ -799,6 +823,30 @@ fn create_name(e: &Expr, input_schema: &DFSchema) ->
Result<String> {
let expr = create_name(expr, input_schema)?;
Ok(format!("{} IS NOT NULL", expr))
}
+ Expr::IsTrue(expr) => {
+ let expr = create_name(expr, input_schema)?;
+ Ok(format!("{} IS TRUE", expr))
+ }
+ Expr::IsFalse(expr) => {
+ let expr = create_name(expr, input_schema)?;
+ Ok(format!("{} IS FALSE", expr))
+ }
+ Expr::IsUnknown(expr) => {
+ let expr = create_name(expr, input_schema)?;
+ Ok(format!("{} IS UNKNOWN", expr))
+ }
+ Expr::IsNotTrue(expr) => {
+ let expr = create_name(expr, input_schema)?;
+ Ok(format!("{} IS NOT TRUE", expr))
+ }
+ Expr::IsNotFalse(expr) => {
+ let expr = create_name(expr, input_schema)?;
+ Ok(format!("{} IS NOT FALSE", expr))
+ }
+ Expr::IsNotUnknown(expr) => {
+ let expr = create_name(expr, input_schema)?;
+ Ok(format!("{} IS NOT UNKNOWN", expr))
+ }
Expr::Exists { negated: true, .. } => Ok("NOT EXISTS".to_string()),
Expr::Exists { negated: false, .. } => Ok("EXISTS".to_string()),
Expr::InSubquery { negated: true, .. } => Ok("NOT IN".to_string()),
diff --git a/datafusion/expr/src/expr_rewriter.rs
b/datafusion/expr/src/expr_rewriter.rs
index 9e8fa8a7e..d2e44f351 100644
--- a/datafusion/expr/src/expr_rewriter.rs
+++ b/datafusion/expr/src/expr_rewriter.rs
@@ -131,6 +131,14 @@ impl ExprRewritable for Expr {
Expr::Not(expr) => Expr::Not(rewrite_boxed(expr, rewriter)?),
Expr::IsNotNull(expr) => Expr::IsNotNull(rewrite_boxed(expr,
rewriter)?),
Expr::IsNull(expr) => Expr::IsNull(rewrite_boxed(expr, rewriter)?),
+ Expr::IsTrue(expr) => Expr::IsTrue(rewrite_boxed(expr, rewriter)?),
+ Expr::IsFalse(expr) => Expr::IsFalse(rewrite_boxed(expr,
rewriter)?),
+ Expr::IsUnknown(expr) => Expr::IsUnknown(rewrite_boxed(expr,
rewriter)?),
+ Expr::IsNotTrue(expr) => Expr::IsNotTrue(rewrite_boxed(expr,
rewriter)?),
+ Expr::IsNotFalse(expr) => Expr::IsNotFalse(rewrite_boxed(expr,
rewriter)?),
+ Expr::IsNotUnknown(expr) => {
+ Expr::IsNotUnknown(rewrite_boxed(expr, rewriter)?)
+ }
Expr::Negative(expr) => Expr::Negative(rewrite_boxed(expr,
rewriter)?),
Expr::Between {
expr,
diff --git a/datafusion/expr/src/expr_schema.rs
b/datafusion/expr/src/expr_schema.rs
index bbb414655..0900a2999 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -104,7 +104,13 @@ impl ExprSchemable for Expr {
| Expr::InSubquery { .. }
| Expr::Between { .. }
| Expr::InList { .. }
- | Expr::IsNotNull(_) => Ok(DataType::Boolean),
+ | Expr::IsNotNull(_)
+ | Expr::IsTrue(_)
+ | Expr::IsFalse(_)
+ | Expr::IsUnknown(_)
+ | Expr::IsNotTrue(_)
+ | Expr::IsNotFalse(_)
+ | Expr::IsNotUnknown(_) => Ok(DataType::Boolean),
Expr::ScalarSubquery(subquery) => {
Ok(subquery.subquery.schema().field(0).data_type().clone())
}
@@ -183,7 +189,15 @@ impl ExprSchemable for Expr {
| Expr::WindowFunction { .. }
| Expr::AggregateFunction { .. }
| Expr::AggregateUDF { .. } => Ok(true),
- Expr::IsNull(_) | Expr::IsNotNull(_) | Expr::Exists { .. } =>
Ok(false),
+ Expr::IsNull(_)
+ | Expr::IsNotNull(_)
+ | Expr::IsTrue(_)
+ | Expr::IsFalse(_)
+ | Expr::IsUnknown(_)
+ | Expr::IsNotTrue(_)
+ | Expr::IsNotFalse(_)
+ | Expr::IsNotUnknown(_)
+ | Expr::Exists { .. } => Ok(false),
Expr::InSubquery { expr, .. } => expr.nullable(input_schema),
Expr::ScalarSubquery(subquery) => {
Ok(subquery.subquery.schema().field(0).is_nullable())
diff --git a/datafusion/expr/src/expr_visitor.rs
b/datafusion/expr/src/expr_visitor.rs
index 162db60a0..3166ced7c 100644
--- a/datafusion/expr/src/expr_visitor.rs
+++ b/datafusion/expr/src/expr_visitor.rs
@@ -97,6 +97,12 @@ impl ExprVisitable for Expr {
Expr::Alias(expr, _)
| Expr::Not(expr)
| Expr::IsNotNull(expr)
+ | Expr::IsTrue(expr)
+ | Expr::IsFalse(expr)
+ | Expr::IsUnknown(expr)
+ | Expr::IsNotTrue(expr)
+ | Expr::IsNotFalse(expr)
+ | Expr::IsNotUnknown(expr)
| Expr::IsNull(expr)
| Expr::Negative(expr)
| Expr::Cast { expr, .. }
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 367c722d2..7d3f78b8f 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -82,6 +82,12 @@ impl ExpressionVisitor for ColumnNameVisitor<'_> {
| Expr::Not(_)
| Expr::IsNotNull(_)
| Expr::IsNull(_)
+ | Expr::IsTrue(_)
+ | Expr::IsFalse(_)
+ | Expr::IsUnknown(_)
+ | Expr::IsNotTrue(_)
+ | Expr::IsNotFalse(_)
+ | Expr::IsNotUnknown(_)
| Expr::Negative(_)
| Expr::Between { .. }
| Expr::Case { .. }
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs
b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 8627b404d..305283d99 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -414,6 +414,24 @@ impl ExprIdentifierVisitor<'_> {
Expr::IsNull(_) => {
desc.push_str("IsNull-");
}
+ Expr::IsTrue(_) => {
+ desc.push_str("IsTrue-");
+ }
+ Expr::IsFalse(_) => {
+ desc.push_str("IsFalse-");
+ }
+ Expr::IsUnknown(_) => {
+ desc.push_str("IsUnknown-");
+ }
+ Expr::IsNotTrue(_) => {
+ desc.push_str("IsNotTrue-");
+ }
+ Expr::IsNotFalse(_) => {
+ desc.push_str("IsNotFalse-");
+ }
+ Expr::IsNotUnknown(_) => {
+ desc.push_str("IsNotUnknown-");
+ }
Expr::Negative(_) => {
desc.push_str("Negative-");
}
diff --git a/datafusion/optimizer/src/simplify_expressions.rs
b/datafusion/optimizer/src/simplify_expressions.rs
index 384fd09ae..1735e80ef 100644
--- a/datafusion/optimizer/src/simplify_expressions.rs
+++ b/datafusion/optimizer/src/simplify_expressions.rs
@@ -462,6 +462,12 @@ impl<'a> ConstEvaluator<'a> {
| Expr::Not(_)
| Expr::IsNotNull(_)
| Expr::IsNull(_)
+ | Expr::IsTrue(_)
+ | Expr::IsFalse(_)
+ | Expr::IsUnknown(_)
+ | Expr::IsNotTrue(_)
+ | Expr::IsNotFalse(_)
+ | Expr::IsNotUnknown(_)
| Expr::Negative(_)
| Expr::Between { .. }
| Expr::Case { .. }
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index 0b4a43e83..b3ea22f43 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -312,6 +312,13 @@ message LogicalExprNode {
CubeNode cube = 23;
RollupNode rollup = 24;
+
+ IsTrue is_true = 25;
+ IsFalse is_false = 26;
+ IsUnknown is_unknown = 27;
+ IsNotTrue is_not_true = 28;
+ IsNotFalse is_not_false = 29;
+ IsNotUnknown is_not_unknown = 30;
}
}
@@ -346,6 +353,30 @@ message IsNotNull {
LogicalExprNode expr = 1;
}
+message IsTrue {
+ LogicalExprNode expr = 1;
+}
+
+message IsFalse {
+ LogicalExprNode expr = 1;
+}
+
+message IsUnknown {
+ LogicalExprNode expr = 1;
+}
+
+message IsNotTrue {
+ LogicalExprNode expr = 1;
+}
+
+message IsNotFalse {
+ LogicalExprNode expr = 1;
+}
+
+message IsNotUnknown {
+ LogicalExprNode expr = 1;
+}
+
message Not {
LogicalExprNode expr = 1;
}
diff --git a/datafusion/proto/src/from_proto.rs
b/datafusion/proto/src/from_proto.rs
index 12f94ce36..91ef29ca0 100644
--- a/datafusion/proto/src/from_proto.rs
+++ b/datafusion/proto/src/from_proto.rs
@@ -908,6 +908,24 @@ pub fn parse_expr(
ExprType::NotExpr(not) => Ok(Expr::Not(Box::new(parse_required_expr(
¬.expr, registry, "expr",
)?))),
+ ExprType::IsTrue(msg) => Ok(Expr::IsTrue(Box::new(parse_required_expr(
+ &msg.expr, registry, "expr",
+ )?))),
+ ExprType::IsFalse(msg) =>
Ok(Expr::IsFalse(Box::new(parse_required_expr(
+ &msg.expr, registry, "expr",
+ )?))),
+ ExprType::IsUnknown(msg) =>
Ok(Expr::IsUnknown(Box::new(parse_required_expr(
+ &msg.expr, registry, "expr",
+ )?))),
+ ExprType::IsNotTrue(msg) =>
Ok(Expr::IsNotTrue(Box::new(parse_required_expr(
+ &msg.expr, registry, "expr",
+ )?))),
+ ExprType::IsNotFalse(msg) =>
Ok(Expr::IsNotFalse(Box::new(parse_required_expr(
+ &msg.expr, registry, "expr",
+ )?))),
+ ExprType::IsNotUnknown(msg) => Ok(Expr::IsNotUnknown(Box::new(
+ parse_required_expr(&msg.expr, registry, "expr")?,
+ ))),
ExprType::Between(between) => Ok(Expr::Between {
expr: Box::new(parse_required_expr(&between.expr, registry,
"expr")?),
negated: between.negated,
diff --git a/datafusion/proto/src/to_proto.rs b/datafusion/proto/src/to_proto.rs
index d3f68b3b4..387582af9 100644
--- a/datafusion/proto/src/to_proto.rs
+++ b/datafusion/proto/src/to_proto.rs
@@ -619,6 +619,54 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode {
expr_type: Some(ExprType::IsNotNullExpr(expr)),
}
}
+ Expr::IsTrue(expr) => {
+ let expr = Box::new(protobuf::IsTrue {
+ expr: Some(Box::new(expr.as_ref().try_into()?)),
+ });
+ Self {
+ expr_type: Some(ExprType::IsTrue(expr)),
+ }
+ }
+ Expr::IsFalse(expr) => {
+ let expr = Box::new(protobuf::IsFalse {
+ expr: Some(Box::new(expr.as_ref().try_into()?)),
+ });
+ Self {
+ expr_type: Some(ExprType::IsFalse(expr)),
+ }
+ }
+ Expr::IsUnknown(expr) => {
+ let expr = Box::new(protobuf::IsUnknown {
+ expr: Some(Box::new(expr.as_ref().try_into()?)),
+ });
+ Self {
+ expr_type: Some(ExprType::IsUnknown(expr)),
+ }
+ }
+ Expr::IsNotTrue(expr) => {
+ let expr = Box::new(protobuf::IsNotTrue {
+ expr: Some(Box::new(expr.as_ref().try_into()?)),
+ });
+ Self {
+ expr_type: Some(ExprType::IsNotTrue(expr)),
+ }
+ }
+ Expr::IsNotFalse(expr) => {
+ let expr = Box::new(protobuf::IsNotFalse {
+ expr: Some(Box::new(expr.as_ref().try_into()?)),
+ });
+ Self {
+ expr_type: Some(ExprType::IsNotFalse(expr)),
+ }
+ }
+ Expr::IsNotUnknown(expr) => {
+ let expr = Box::new(protobuf::IsNotUnknown {
+ expr: Some(Box::new(expr.as_ref().try_into()?)),
+ });
+ Self {
+ expr_type: Some(ExprType::IsNotUnknown(expr)),
+ }
+ }
Expr::Between {
expr,
negated,
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 81ea34de1..ebe850bdf 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -286,6 +286,24 @@ where
Expr::IsNull(nested_expr) => Ok(Expr::IsNull(Box::new(
clone_with_replacement(nested_expr, replacement_fn)?,
))),
+ Expr::IsTrue(nested_expr) => Ok(Expr::IsTrue(Box::new(
+ clone_with_replacement(nested_expr, replacement_fn)?,
+ ))),
+ Expr::IsFalse(nested_expr) => Ok(Expr::IsFalse(Box::new(
+ clone_with_replacement(nested_expr, replacement_fn)?,
+ ))),
+ Expr::IsUnknown(nested_expr) => Ok(Expr::IsUnknown(Box::new(
+ clone_with_replacement(nested_expr, replacement_fn)?,
+ ))),
+ Expr::IsNotTrue(nested_expr) => Ok(Expr::IsNotTrue(Box::new(
+ clone_with_replacement(nested_expr, replacement_fn)?,
+ ))),
+ Expr::IsNotFalse(nested_expr) => Ok(Expr::IsNotFalse(Box::new(
+ clone_with_replacement(nested_expr, replacement_fn)?,
+ ))),
+ Expr::IsNotUnknown(nested_expr) => Ok(Expr::IsNotUnknown(Box::new(
+ clone_with_replacement(nested_expr, replacement_fn)?,
+ ))),
Expr::Cast {
expr: nested_expr,
data_type,