This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new c96f03e79 Add SQL planner support for `Like`, `ILike` and `SimilarTo`,
with optional escape character (#3101)
c96f03e79 is described below
commit c96f03e7921580d931ef9a96b79f3dfb87be06c3
Author: Andy Grove <[email protected]>
AuthorDate: Fri Sep 9 08:34:20 2022 -0600
Add SQL planner support for `Like`, `ILike` and `SimilarTo`, with optional
escape character (#3101)
* Make Like a top-level Expr
* revert some changes
* add type validation
* Revert physical plan changes and reduce scope of the PR
* Revert more changes
* Revert more changes
* clippy
* address feedback
* revert change to test
* revert more changes
---
datafusion/physical-expr/src/planner.rs | 20 +++++++++++
datafusion/proto/src/lib.rs | 54 ++++++++++++++++++++++++++++
datafusion/sql/src/planner.rs | 64 ++++++++++++++++++++++-----------
3 files changed, 117 insertions(+), 21 deletions(-)
diff --git a/datafusion/physical-expr/src/planner.rs
b/datafusion/physical-expr/src/planner.rs
index 9a3ad2a8d..7e31dea0a 100644
--- a/datafusion/physical-expr/src/planner.rs
+++ b/datafusion/physical-expr/src/planner.rs
@@ -203,6 +203,26 @@ pub fn create_physical_expr(
}
}
}
+ Expr::Like {
+ negated,
+ expr,
+ pattern,
+ escape_char,
+ } => {
+ if escape_char.is_some() {
+ return Err(DataFusionError::Execution(
+ "LIKE does not support escape_char".to_string(),
+ ));
+ }
+ let op = if *negated {
+ Operator::NotLike
+ } else {
+ Operator::Like
+ };
+ let bin_expr =
+ binary_expr(expr.as_ref().clone(), op,
pattern.as_ref().clone());
+ create_physical_expr(&bin_expr, input_dfschema, input_schema,
execution_props)
+ }
Expr::Case {
expr,
when_then_expr,
diff --git a/datafusion/proto/src/lib.rs b/datafusion/proto/src/lib.rs
index 7e009a846..7c8a76074 100644
--- a/datafusion/proto/src/lib.rs
+++ b/datafusion/proto/src/lib.rs
@@ -929,6 +929,60 @@ mod roundtrip_tests {
roundtrip_expr_test(test_expr, ctx);
}
+ #[test]
+ fn roundtrip_like() {
+ fn like(negated: bool, escape_char: Option<char>) {
+ let test_expr = Expr::Like {
+ negated,
+ expr: Box::new(col("col")),
+ pattern: Box::new(lit("[0-9]+")),
+ escape_char,
+ };
+ let ctx = SessionContext::new();
+ roundtrip_expr_test(test_expr, ctx);
+ }
+ like(true, Some('X'));
+ like(false, Some('\\'));
+ like(true, None);
+ like(false, None);
+ }
+
+ #[test]
+ fn roundtrip_ilike() {
+ fn ilike(negated: bool, escape_char: Option<char>) {
+ let test_expr = Expr::ILike {
+ negated,
+ expr: Box::new(col("col")),
+ pattern: Box::new(lit("[0-9]+")),
+ escape_char,
+ };
+ let ctx = SessionContext::new();
+ roundtrip_expr_test(test_expr, ctx);
+ }
+ ilike(true, Some('X'));
+ ilike(false, Some('\\'));
+ ilike(true, None);
+ ilike(false, None);
+ }
+
+ #[test]
+ fn roundtrip_similar_to() {
+ fn similar_to(negated: bool, escape_char: Option<char>) {
+ let test_expr = Expr::SimilarTo {
+ negated,
+ expr: Box::new(col("col")),
+ pattern: Box::new(lit("[0-9]+")),
+ escape_char,
+ };
+ let ctx = SessionContext::new();
+ roundtrip_expr_test(test_expr, ctx);
+ }
+ similar_to(true, Some('X'));
+ similar_to(false, Some('\\'));
+ similar_to(true, None);
+ similar_to(false, None);
+ }
+
#[test]
fn roundtrip_count() {
let test_expr = Expr::AggregateFunction {
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index d9dbdf143..dd44459f5 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -35,8 +35,8 @@ use datafusion_expr::utils::{
COUNT_STAR_EXPANSION,
};
use datafusion_expr::{
- and, col, lit, AggregateFunction, AggregateUDF, Expr, Operator, ScalarUDF,
- WindowFrame, WindowFrameUnits,
+ and, col, lit, AggregateFunction, AggregateUDF, Expr, ExprSchemable,
Operator,
+ ScalarUDF, WindowFrame, WindowFrameUnits,
};
use datafusion_expr::{
window_function::WindowFunction, BuiltinScalarFunction, TableSource,
@@ -1939,30 +1939,52 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
}
SQLExpr::Like { negated, expr, pattern, escape_char } => {
- match escape_char {
- Some(_) => {
- // to support this we will need to introduce
`Expr::Like` instead
- // of treating it like a binary expression
- Err(DataFusionError::NotImplemented("LIKE with ESCAPE
is not yet supported".to_string()))
- },
- _ => {
- Ok(Expr::BinaryExpr {
- left:
Box::new(self.sql_expr_to_logical_expr(*expr, schema, ctes)?),
- op: if negated { Operator::NotLike } else {
Operator::Like },
- right:
Box::new(self.sql_expr_to_logical_expr(*pattern, schema, ctes)?),
- })
- }
+ let pattern = self.sql_expr_to_logical_expr(*pattern, schema,
ctes)?;
+ let pattern_type = pattern.get_type(schema)?;
+ if pattern_type != DataType::Utf8 && pattern_type !=
DataType::Null {
+ return Err(DataFusionError::Plan(
+ "Invalid pattern in LIKE expression".to_string(),
+ ));
}
+ Ok(Expr::Like {
+ negated,
+ expr: Box::new(self.sql_expr_to_logical_expr(*expr,
schema, ctes)?),
+ pattern: Box::new(pattern),
+ escape_char
+
+ })
}
- SQLExpr::ILike { .. } => {
- // https://github.com/apache/arrow-datafusion/issues/3099
- Err(DataFusionError::NotImplemented("ILIKE is not yet
supported".to_string()))
+ SQLExpr::ILike { negated, expr, pattern, escape_char } => {
+ let pattern = self.sql_expr_to_logical_expr(*pattern, schema,
ctes)?;
+ let pattern_type = pattern.get_type(schema)?;
+ if pattern_type != DataType::Utf8 && pattern_type !=
DataType::Null {
+ return Err(DataFusionError::Plan(
+ "Invalid pattern in ILIKE expression".to_string(),
+ ));
+ }
+ Ok(Expr::ILike {
+ negated,
+ expr: Box::new(self.sql_expr_to_logical_expr(*expr,
schema, ctes)?),
+ pattern: Box::new(pattern),
+ escape_char
+ })
}
- SQLExpr::SimilarTo { .. } => {
- // https://github.com/apache/arrow-datafusion/issues/3099
- Err(DataFusionError::NotImplemented("SIMILAR TO is not yet
supported".to_string()))
+ SQLExpr::SimilarTo { negated, expr, pattern, escape_char } => {
+ let pattern = self.sql_expr_to_logical_expr(*pattern, schema,
ctes)?;
+ let pattern_type = pattern.get_type(schema)?;
+ if pattern_type != DataType::Utf8 && pattern_type !=
DataType::Null {
+ return Err(DataFusionError::Plan(
+ "Invalid pattern in SIMILAR TO expression".to_string(),
+ ));
+ }
+ Ok(Expr::SimilarTo {
+ negated,
+ expr: Box::new(self.sql_expr_to_logical_expr(*expr,
schema, ctes)?),
+ pattern: Box::new(pattern),
+ escape_char
+ })
}
SQLExpr::BinaryOp {