This is an automated email from the ASF dual-hosted git repository.
houqp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 01b5244 refine match pattern related code (#1650)
01b5244 is described below
commit 01b5244cb030e8285d0c54db760c56e6d5da83ba
Author: xudong.w <[email protected]>
AuthorDate: Mon Jan 24 03:09:28 2022 +0800
refine match pattern related code (#1650)
---
datafusion/src/logical_plan/expr.rs | 90 +++++++++++-------------
datafusion/src/optimizer/simplify_expressions.rs | 41 ++++++-----
datafusion/src/optimizer/utils.rs | 78 ++++++++++----------
3 files changed, 100 insertions(+), 109 deletions(-)
diff --git a/datafusion/src/logical_plan/expr.rs
b/datafusion/src/logical_plan/expr.rs
index fb4f20c..98c2969 100644
--- a/datafusion/src/logical_plan/expr.rs
+++ b/datafusion/src/logical_plan/expr.rs
@@ -400,13 +400,16 @@ impl Expr {
/// the expression is incorrectly typed (e.g. `[utf8] + [bool]`).
pub fn get_type(&self, schema: &DFSchema) -> Result<DataType> {
match self {
- Expr::Alias(expr, _) => expr.get_type(schema),
+ Expr::Alias(expr, _) | Expr::Sort { expr, .. } |
Expr::Negative(expr) => {
+ expr.get_type(schema)
+ }
Expr::Column(c) =>
Ok(schema.field_from_column(c)?.data_type().clone()),
Expr::ScalarVariable(_) => Ok(DataType::Utf8),
Expr::Literal(l) => Ok(l.get_datatype()),
Expr::Case { when_then_expr, .. } =>
when_then_expr[0].1.get_type(schema),
- Expr::Cast { data_type, .. } => Ok(data_type.clone()),
- Expr::TryCast { data_type, .. } => Ok(data_type.clone()),
+ Expr::Cast { data_type, .. } | Expr::TryCast { data_type, .. } => {
+ Ok(data_type.clone())
+ }
Expr::ScalarUDF { fun, args } => {
let data_types = args
.iter()
@@ -442,10 +445,11 @@ impl Expr {
.collect::<Result<Vec<_>>>()?;
Ok((fun.return_type)(&data_types)?.as_ref().clone())
}
- Expr::Not(_) => Ok(DataType::Boolean),
- Expr::Negative(expr) => expr.get_type(schema),
- Expr::IsNull(_) => Ok(DataType::Boolean),
- Expr::IsNotNull(_) => Ok(DataType::Boolean),
+ Expr::Not(_)
+ | Expr::IsNull(_)
+ | Expr::Between { .. }
+ | Expr::InList { .. }
+ | Expr::IsNotNull(_) => Ok(DataType::Boolean),
Expr::BinaryExpr {
ref left,
ref right,
@@ -455,9 +459,6 @@ impl Expr {
op,
&right.get_type(schema)?,
),
- Expr::Sort { ref expr, .. } => expr.get_type(schema),
- Expr::Between { .. } => Ok(DataType::Boolean),
- Expr::InList { .. } => Ok(DataType::Boolean),
Expr::Wildcard => Err(DataFusionError::Internal(
"Wildcard expressions are not valid in a logical query
plan".to_owned(),
)),
@@ -477,10 +478,14 @@ impl Expr {
/// This happens when the expression refers to a column that does not
exist in the schema.
pub fn nullable(&self, input_schema: &DFSchema) -> Result<bool> {
match self {
- Expr::Alias(expr, _) => expr.nullable(input_schema),
+ Expr::Alias(expr, _)
+ | Expr::Not(expr)
+ | Expr::Negative(expr)
+ | Expr::Sort { expr, .. }
+ | Expr::Between { expr, .. }
+ | Expr::InList { expr, .. } => expr.nullable(input_schema),
Expr::Column(c) =>
Ok(input_schema.field_from_column(c)?.is_nullable()),
Expr::Literal(value) => Ok(value.is_null()),
- Expr::ScalarVariable(_) => Ok(true),
Expr::Case {
when_then_expr,
else_expr,
@@ -500,24 +505,19 @@ impl Expr {
}
}
Expr::Cast { expr, .. } => expr.nullable(input_schema),
- Expr::TryCast { .. } => Ok(true),
- Expr::ScalarFunction { .. } => Ok(true),
- Expr::ScalarUDF { .. } => Ok(true),
- Expr::WindowFunction { .. } => Ok(true),
- Expr::AggregateFunction { .. } => Ok(true),
- Expr::AggregateUDF { .. } => Ok(true),
- Expr::Not(expr) => expr.nullable(input_schema),
- Expr::Negative(expr) => expr.nullable(input_schema),
- Expr::IsNull(_) => Ok(false),
- Expr::IsNotNull(_) => Ok(false),
+ Expr::ScalarVariable(_)
+ | Expr::TryCast { .. }
+ | Expr::ScalarFunction { .. }
+ | Expr::ScalarUDF { .. }
+ | Expr::WindowFunction { .. }
+ | Expr::AggregateFunction { .. }
+ | Expr::AggregateUDF { .. } => Ok(true),
+ Expr::IsNull(_) | Expr::IsNotNull(_) => Ok(false),
Expr::BinaryExpr {
ref left,
ref right,
..
} => Ok(left.nullable(input_schema)? ||
right.nullable(input_schema)?),
- Expr::Sort { ref expr, .. } => expr.nullable(input_schema),
- Expr::Between { ref expr, .. } => expr.nullable(input_schema),
- Expr::InList { ref expr, .. } => expr.nullable(input_schema),
Expr::Wildcard => Err(DataFusionError::Internal(
"Wildcard expressions are not valid in a logical query
plan".to_owned(),
)),
@@ -723,18 +723,23 @@ impl Expr {
// recurse (and cover all expression types)
let visitor = match self {
- Expr::Alias(expr, _) => expr.accept(visitor),
- Expr::Column(_) => Ok(visitor),
- Expr::ScalarVariable(..) => Ok(visitor),
- Expr::Literal(..) => Ok(visitor),
+ Expr::Alias(expr, _)
+ | Expr::Not(expr)
+ | Expr::IsNotNull(expr)
+ | Expr::IsNull(expr)
+ | Expr::Negative(expr)
+ | Expr::Cast { expr, .. }
+ | Expr::TryCast { expr, .. }
+ | Expr::Sort { expr, .. }
+ | Expr::GetIndexedField { expr, .. } => expr.accept(visitor),
+ Expr::Column(_)
+ | Expr::ScalarVariable(_)
+ | Expr::Literal(_)
+ | Expr::Wildcard => Ok(visitor),
Expr::BinaryExpr { left, right, .. } => {
let visitor = left.accept(visitor)?;
right.accept(visitor)
}
- Expr::Not(expr) => expr.accept(visitor),
- Expr::IsNotNull(expr) => expr.accept(visitor),
- Expr::IsNull(expr) => expr.accept(visitor),
- Expr::Negative(expr) => expr.accept(visitor),
Expr::Between {
expr, low, high, ..
} => {
@@ -765,13 +770,10 @@ impl Expr {
Ok(visitor)
}
}
- Expr::Cast { expr, .. } => expr.accept(visitor),
- Expr::TryCast { expr, .. } => expr.accept(visitor),
- Expr::Sort { expr, .. } => expr.accept(visitor),
- Expr::ScalarFunction { args, .. } => args
- .iter()
- .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
- Expr::ScalarUDF { args, .. } => args
+ Expr::ScalarFunction { args, .. }
+ | Expr::ScalarUDF { args, .. }
+ | Expr::AggregateFunction { args, .. }
+ | Expr::AggregateUDF { args, .. } => args
.iter()
.try_fold(visitor, |visitor, arg| arg.accept(visitor)),
Expr::WindowFunction {
@@ -791,19 +793,11 @@ impl Expr {
.try_fold(visitor, |visitor, arg| arg.accept(visitor))?;
Ok(visitor)
}
- Expr::AggregateFunction { args, .. } => args
- .iter()
- .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
- Expr::AggregateUDF { args, .. } => args
- .iter()
- .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
Expr::InList { expr, list, .. } => {
let visitor = expr.accept(visitor)?;
list.iter()
.try_fold(visitor, |visitor, arg| arg.accept(visitor))
}
- Expr::Wildcard => Ok(visitor),
- Expr::GetIndexedField { ref expr, .. } => expr.accept(visitor),
}?;
visitor.post_visit(self)
diff --git a/datafusion/src/optimizer/simplify_expressions.rs
b/datafusion/src/optimizer/simplify_expressions.rs
index 653c613..7127a8f 100644
--- a/datafusion/src/optimizer/simplify_expressions.rs
+++ b/datafusion/src/optimizer/simplify_expressions.rs
@@ -333,29 +333,28 @@ impl ConstEvaluator {
// at plan time
match expr {
// Has no runtime cost, but needed during planning
- Expr::Alias(..) => false,
- Expr::AggregateFunction { .. } => false,
- Expr::AggregateUDF { .. } => false,
- Expr::ScalarVariable(_) => false,
- Expr::Column(_) => false,
+ Expr::Alias(..)
+ | Expr::AggregateFunction { .. }
+ | Expr::AggregateUDF { .. }
+ | Expr::ScalarVariable(_)
+ | Expr::Column(_)
+ | Expr::WindowFunction { .. }
+ | Expr::Sort { .. }
+ | Expr::Wildcard => false,
Expr::ScalarFunction { fun, .. } =>
Self::volatility_ok(fun.volatility()),
Expr::ScalarUDF { fun, .. } =>
Self::volatility_ok(fun.signature.volatility),
- Expr::WindowFunction { .. } => false,
- Expr::Sort { .. } => false,
- Expr::Wildcard => false,
-
- Expr::Literal(_) => true,
- Expr::BinaryExpr { .. } => true,
- Expr::Not(_) => true,
- Expr::IsNotNull(_) => true,
- Expr::IsNull(_) => true,
- Expr::Negative(_) => true,
- Expr::Between { .. } => true,
- Expr::Case { .. } => true,
- Expr::Cast { .. } => true,
- Expr::TryCast { .. } => true,
- Expr::InList { .. } => true,
- Expr::GetIndexedField { .. } => true,
+ Expr::Literal(_)
+ | Expr::BinaryExpr { .. }
+ | Expr::Not(_)
+ | Expr::IsNotNull(_)
+ | Expr::IsNull(_)
+ | Expr::Negative(_)
+ | Expr::Between { .. }
+ | Expr::Case { .. }
+ | Expr::Cast { .. }
+ | Expr::TryCast { .. }
+ | Expr::InList { .. }
+ | Expr::GetIndexedField { .. } => true,
}
}
diff --git a/datafusion/src/optimizer/utils.rs
b/datafusion/src/optimizer/utils.rs
index 541ac67..f7ab836 100644
--- a/datafusion/src/optimizer/utils.rs
+++ b/datafusion/src/optimizer/utils.rs
@@ -63,26 +63,26 @@ impl ExpressionVisitor for ColumnNameVisitor<'_> {
Expr::ScalarVariable(var_names) => {
self.accum.insert(Column::from_name(var_names.join(".")));
}
- Expr::Alias(_, _) => {}
- Expr::Literal(_) => {}
- Expr::BinaryExpr { .. } => {}
- Expr::Not(_) => {}
- Expr::IsNotNull(_) => {}
- Expr::IsNull(_) => {}
- Expr::Negative(_) => {}
- Expr::Between { .. } => {}
- Expr::Case { .. } => {}
- Expr::Cast { .. } => {}
- Expr::TryCast { .. } => {}
- Expr::Sort { .. } => {}
- Expr::ScalarFunction { .. } => {}
- Expr::ScalarUDF { .. } => {}
- Expr::WindowFunction { .. } => {}
- Expr::AggregateFunction { .. } => {}
- Expr::AggregateUDF { .. } => {}
- Expr::InList { .. } => {}
- Expr::Wildcard => {}
- Expr::GetIndexedField { .. } => {}
+ Expr::Alias(_, _)
+ | Expr::Literal(_)
+ | Expr::BinaryExpr { .. }
+ | Expr::Not(_)
+ | Expr::IsNotNull(_)
+ | Expr::IsNull(_)
+ | Expr::Negative(_)
+ | Expr::Between { .. }
+ | Expr::Case { .. }
+ | Expr::Cast { .. }
+ | Expr::TryCast { .. }
+ | Expr::Sort { .. }
+ | Expr::ScalarFunction { .. }
+ | Expr::ScalarUDF { .. }
+ | Expr::WindowFunction { .. }
+ | Expr::AggregateFunction { .. }
+ | Expr::AggregateUDF { .. }
+ | Expr::InList { .. }
+ | Expr::Wildcard
+ | Expr::GetIndexedField { .. } => {}
}
Ok(Recursion::Continue(self))
}
@@ -281,10 +281,19 @@ pub fn expr_sub_expressions(expr: &Expr) ->
Result<Vec<Expr>> {
Expr::BinaryExpr { left, right, .. } => {
Ok(vec![left.as_ref().to_owned(), right.as_ref().to_owned()])
}
- Expr::IsNull(e) => Ok(vec![e.as_ref().to_owned()]),
- Expr::IsNotNull(e) => Ok(vec![e.as_ref().to_owned()]),
- Expr::ScalarFunction { args, .. } => Ok(args.clone()),
- Expr::ScalarUDF { args, .. } => Ok(args.clone()),
+ Expr::IsNull(expr)
+ | Expr::IsNotNull(expr)
+ | Expr::Cast { expr, .. }
+ | Expr::TryCast { expr, .. }
+ | Expr::Alias(expr, ..)
+ | Expr::Not(expr)
+ | Expr::Negative(expr)
+ | Expr::Sort { expr, .. }
+ | Expr::GetIndexedField { expr, .. } =>
Ok(vec![expr.as_ref().to_owned()]),
+ Expr::ScalarFunction { args, .. }
+ | Expr::ScalarUDF { args, .. }
+ | Expr::AggregateFunction { args, .. }
+ | Expr::AggregateUDF { args, .. } => Ok(args.clone()),
Expr::WindowFunction {
args,
partition_by,
@@ -299,8 +308,6 @@ pub fn expr_sub_expressions(expr: &Expr) ->
Result<Vec<Expr>> {
expr_list.extend(order_by.clone());
Ok(expr_list)
}
- Expr::AggregateFunction { args, .. } => Ok(args.clone()),
- Expr::AggregateUDF { args, .. } => Ok(args.clone()),
Expr::Case {
expr,
when_then_expr,
@@ -322,15 +329,7 @@ pub fn expr_sub_expressions(expr: &Expr) ->
Result<Vec<Expr>> {
}
Ok(expr_list)
}
- Expr::Cast { expr, .. } => Ok(vec![expr.as_ref().to_owned()]),
- Expr::TryCast { expr, .. } => Ok(vec![expr.as_ref().to_owned()]),
- Expr::Column(_) => Ok(vec![]),
- Expr::Alias(expr, ..) => Ok(vec![expr.as_ref().to_owned()]),
- Expr::Literal(_) => Ok(vec![]),
- Expr::ScalarVariable(_) => Ok(vec![]),
- Expr::Not(expr) => Ok(vec![expr.as_ref().to_owned()]),
- Expr::Negative(expr) => Ok(vec![expr.as_ref().to_owned()]),
- Expr::Sort { expr, .. } => Ok(vec![expr.as_ref().to_owned()]),
+ Expr::Column(_) | Expr::Literal(_) | Expr::ScalarVariable(_) =>
Ok(vec![]),
Expr::Between {
expr, low, high, ..
} => Ok(vec![
@@ -348,7 +347,6 @@ pub fn expr_sub_expressions(expr: &Expr) ->
Result<Vec<Expr>> {
Expr::Wildcard { .. } => Err(DataFusionError::Internal(
"Wildcard expressions are not valid in a logical query
plan".to_owned(),
)),
- Expr::GetIndexedField { expr, .. } =>
Ok(vec![expr.as_ref().to_owned()]),
}
}
@@ -473,9 +471,10 @@ pub fn rewrite_expression(expr: &Expr, expressions:
&[Expr]) -> Result<Expr> {
}
Expr::Not(_) => Ok(Expr::Not(Box::new(expressions[0].clone()))),
Expr::Negative(_) =>
Ok(Expr::Negative(Box::new(expressions[0].clone()))),
- Expr::Column(_) => Ok(expr.clone()),
- Expr::Literal(_) => Ok(expr.clone()),
- Expr::ScalarVariable(_) => Ok(expr.clone()),
+ Expr::Column(_)
+ | Expr::Literal(_)
+ | Expr::InList { .. }
+ | Expr::ScalarVariable(_) => Ok(expr.clone()),
Expr::Sort {
asc, nulls_first, ..
} => Ok(Expr::Sort {
@@ -504,7 +503,6 @@ pub fn rewrite_expression(expr: &Expr, expressions:
&[Expr]) -> Result<Expr> {
Ok(expr)
}
}
- Expr::InList { .. } => Ok(expr.clone()),
Expr::Wildcard { .. } => Err(DataFusionError::Internal(
"Wildcard expressions are not valid in a logical query
plan".to_owned(),
)),