This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new bd38142de4 Add support for Postgres LIKE operators (#8894)
bd38142de4 is described below
commit bd38142de4da21fb7fefa1db997fa5c7eb95ffcc
Author: Marko Grujic <[email protected]>
AuthorDate: Fri Jan 26 12:02:15 2024 +0100
Add support for Postgres LIKE operators (#8894)
* Add support for PG LIKE operators
* Bump sqlparser dep from branch to merge commit
---
datafusion/expr/src/operator.rs | 24 +++++
datafusion/expr/src/type_coercion/binary.rs | 7 ++
datafusion/physical-expr/src/expressions/binary.rs | 105 ++++++++++++++++++++-
datafusion/sql/src/expr/binary_op.rs | 4 +
datafusion/sqllogictest/test_files/predicates.slt | 24 +++++
datafusion/substrait/src/logical_plan/producer.rs | 4 +
docs/source/user-guide/sql/operators.md | 52 ++++++++++
7 files changed, 218 insertions(+), 2 deletions(-)
diff --git a/datafusion/expr/src/operator.rs b/datafusion/expr/src/operator.rs
index 57888a11d4..a10312e234 100644
--- a/datafusion/expr/src/operator.rs
+++ b/datafusion/expr/src/operator.rs
@@ -69,6 +69,14 @@ pub enum Operator {
RegexNotMatch,
/// Case insensitive regex not match
RegexNotIMatch,
+ /// Case sensitive pattern match
+ LikeMatch,
+ /// Case insensitive pattern match
+ ILikeMatch,
+ /// Case sensitive pattern not match
+ NotLikeMatch,
+ /// Case insensitive pattern not match
+ NotILikeMatch,
/// Bitwise and, like `&`
BitwiseAnd,
/// Bitwise or, like `|`
@@ -100,6 +108,10 @@ impl Operator {
Operator::GtEq => Some(Operator::Lt),
Operator::IsDistinctFrom => Some(Operator::IsNotDistinctFrom),
Operator::IsNotDistinctFrom => Some(Operator::IsDistinctFrom),
+ Operator::LikeMatch => Some(Operator::NotLikeMatch),
+ Operator::ILikeMatch => Some(Operator::NotILikeMatch),
+ Operator::NotLikeMatch => Some(Operator::LikeMatch),
+ Operator::NotILikeMatch => Some(Operator::ILikeMatch),
Operator::Plus
| Operator::Minus
| Operator::Multiply
@@ -192,6 +204,10 @@ impl Operator {
| Operator::RegexIMatch
| Operator::RegexNotMatch
| Operator::RegexNotIMatch
+ | Operator::LikeMatch
+ | Operator::ILikeMatch
+ | Operator::NotLikeMatch
+ | Operator::NotILikeMatch
| Operator::BitwiseAnd
| Operator::BitwiseOr
| Operator::BitwiseXor
@@ -221,6 +237,10 @@ impl Operator {
| Operator::RegexNotMatch
| Operator::RegexIMatch
| Operator::RegexNotIMatch
+ | Operator::LikeMatch
+ | Operator::ILikeMatch
+ | Operator::NotLikeMatch
+ | Operator::NotILikeMatch
| Operator::BitwiseAnd
| Operator::BitwiseOr
| Operator::BitwiseShiftLeft
@@ -253,6 +273,10 @@ impl fmt::Display for Operator {
Operator::RegexIMatch => "~*",
Operator::RegexNotMatch => "!~",
Operator::RegexNotIMatch => "!~*",
+ Operator::LikeMatch => "~~",
+ Operator::ILikeMatch => "~~*",
+ Operator::NotLikeMatch => "!~~",
+ Operator::NotILikeMatch => "!~~*",
Operator::IsDistinctFrom => "IS DISTINCT FROM",
Operator::IsNotDistinctFrom => "IS NOT DISTINCT FROM",
Operator::BitwiseAnd => "&",
diff --git a/datafusion/expr/src/type_coercion/binary.rs
b/datafusion/expr/src/type_coercion/binary.rs
index 6bacc18700..70015c6992 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -101,6 +101,13 @@ fn signature(lhs: &DataType, op: &Operator, rhs:
&DataType) -> Result<Signature>
)
})
}
+ LikeMatch | ILikeMatch | NotLikeMatch | NotILikeMatch => {
+ regex_coercion(lhs, rhs).map(Signature::comparison).ok_or_else(|| {
+ plan_datafusion_err!(
+ "Cannot infer common argument type for regex operation
{lhs} {op} {rhs}"
+ )
+ })
+ }
BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseShiftRight |
BitwiseShiftLeft => {
bitwise_coercion(lhs, rhs).map(Signature::uniform).ok_or_else(|| {
plan_datafusion_err!(
diff --git a/datafusion/physical-expr/src/expressions/binary.rs
b/datafusion/physical-expr/src/expressions/binary.rs
index 8c4078dbce..3f13030092 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -28,12 +28,12 @@ use crate::sort_properties::SortProperties;
use crate::PhysicalExpr;
use arrow::array::*;
-use arrow::compute::cast;
use arrow::compute::kernels::boolean::{and_kleene, not, or_kleene};
use arrow::compute::kernels::cmp::*;
use arrow::compute::kernels::comparison::regexp_is_match_utf8;
use arrow::compute::kernels::comparison::regexp_is_match_utf8_scalar;
use arrow::compute::kernels::concat_elements::concat_elements_utf8;
+use arrow::compute::{cast, ilike, like, nilike, nlike};
use arrow::datatypes::*;
use arrow::record_batch::RecordBatch;
@@ -281,6 +281,10 @@ impl PhysicalExpr for BinaryExpr {
Operator::GtEq => return apply_cmp(&lhs, &rhs, gt_eq),
Operator::IsDistinctFrom => return apply_cmp(&lhs, &rhs, distinct),
Operator::IsNotDistinctFrom => return apply_cmp(&lhs, &rhs,
not_distinct),
+ Operator::LikeMatch => return apply_cmp(&lhs, &rhs, like),
+ Operator::ILikeMatch => return apply_cmp(&lhs, &rhs, ilike),
+ Operator::NotLikeMatch => return apply_cmp(&lhs, &rhs, nlike),
+ Operator::NotILikeMatch => return apply_cmp(&lhs, &rhs, nilike),
_ => {}
}
@@ -554,7 +558,8 @@ impl BinaryExpr {
use Operator::*;
match &self.op {
IsDistinctFrom | IsNotDistinctFrom | Lt | LtEq | Gt | GtEq | Eq |
NotEq
- | Plus | Minus | Multiply | Divide | Modulo => unreachable!(),
+ | Plus | Minus | Multiply | Divide | Modulo | LikeMatch |
ILikeMatch
+ | NotLikeMatch | NotILikeMatch => unreachable!(),
And => {
if left_data_type == &DataType::Boolean {
boolean_op!(&left, &right, and_kleene)
@@ -970,6 +975,102 @@ mod tests {
DataType::Boolean,
[false, false, false, false, true],
);
+ test_coercion!(
+ StringArray,
+ DataType::Utf8,
+ vec!["abc"; 5],
+ StringArray,
+ DataType::Utf8,
+ vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+ Operator::LikeMatch,
+ BooleanArray,
+ DataType::Boolean,
+ [true, false, false, true, false],
+ );
+ test_coercion!(
+ StringArray,
+ DataType::Utf8,
+ vec!["abc"; 5],
+ StringArray,
+ DataType::Utf8,
+ vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+ Operator::ILikeMatch,
+ BooleanArray,
+ DataType::Boolean,
+ [true, true, false, true, true],
+ );
+ test_coercion!(
+ StringArray,
+ DataType::Utf8,
+ vec!["abc"; 5],
+ StringArray,
+ DataType::Utf8,
+ vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+ Operator::NotLikeMatch,
+ BooleanArray,
+ DataType::Boolean,
+ [false, true, true, false, true],
+ );
+ test_coercion!(
+ StringArray,
+ DataType::Utf8,
+ vec!["abc"; 5],
+ StringArray,
+ DataType::Utf8,
+ vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+ Operator::NotILikeMatch,
+ BooleanArray,
+ DataType::Boolean,
+ [false, false, true, false, false],
+ );
+ test_coercion!(
+ LargeStringArray,
+ DataType::LargeUtf8,
+ vec!["abc"; 5],
+ LargeStringArray,
+ DataType::LargeUtf8,
+ vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+ Operator::LikeMatch,
+ BooleanArray,
+ DataType::Boolean,
+ [true, false, false, true, false],
+ );
+ test_coercion!(
+ LargeStringArray,
+ DataType::LargeUtf8,
+ vec!["abc"; 5],
+ LargeStringArray,
+ DataType::LargeUtf8,
+ vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+ Operator::ILikeMatch,
+ BooleanArray,
+ DataType::Boolean,
+ [true, true, false, true, true],
+ );
+ test_coercion!(
+ LargeStringArray,
+ DataType::LargeUtf8,
+ vec!["abc"; 5],
+ LargeStringArray,
+ DataType::LargeUtf8,
+ vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+ Operator::NotLikeMatch,
+ BooleanArray,
+ DataType::Boolean,
+ [false, true, true, false, true],
+ );
+ test_coercion!(
+ LargeStringArray,
+ DataType::LargeUtf8,
+ vec!["abc"; 5],
+ LargeStringArray,
+ DataType::LargeUtf8,
+ vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+ Operator::NotILikeMatch,
+ BooleanArray,
+ DataType::Boolean,
+ [false, false, true, false, false],
+ );
test_coercion!(
Int16Array,
DataType::Int16,
diff --git a/datafusion/sql/src/expr/binary_op.rs
b/datafusion/sql/src/expr/binary_op.rs
index d9c85663e5..78efaca099 100644
--- a/datafusion/sql/src/expr/binary_op.rs
+++ b/datafusion/sql/src/expr/binary_op.rs
@@ -40,6 +40,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
BinaryOperator::PGRegexIMatch => Ok(Operator::RegexIMatch),
BinaryOperator::PGRegexNotMatch => Ok(Operator::RegexNotMatch),
BinaryOperator::PGRegexNotIMatch => Ok(Operator::RegexNotIMatch),
+ BinaryOperator::PGLikeMatch => Ok(Operator::LikeMatch),
+ BinaryOperator::PGILikeMatch => Ok(Operator::ILikeMatch),
+ BinaryOperator::PGNotLikeMatch => Ok(Operator::NotLikeMatch),
+ BinaryOperator::PGNotILikeMatch => Ok(Operator::NotILikeMatch),
BinaryOperator::BitwiseAnd => Ok(Operator::BitwiseAnd),
BinaryOperator::BitwiseOr => Ok(Operator::BitwiseOr),
BinaryOperator::BitwiseXor => Ok(Operator::BitwiseXor),
diff --git a/datafusion/sqllogictest/test_files/predicates.slt
b/datafusion/sqllogictest/test_files/predicates.slt
index b5347f997a..ba407f6d28 100644
--- a/datafusion/sqllogictest/test_files/predicates.slt
+++ b/datafusion/sqllogictest/test_files/predicates.slt
@@ -220,6 +220,30 @@ SELECT * FROM test WHERE column1 !~* 'z'
foo
Barrr
+query T
+SELECT * FROM test WHERE column1 ~~ '__z%'
+----
+Bazzz
+
+query T
+SELECT * FROM test WHERE column1 ~~* '__z%'
+----
+Bazzz
+ZZZZZ
+
+query T
+SELECT * FROM test WHERE column1 !~~ '__z%'
+----
+foo
+Barrr
+ZZZZZ
+
+query T
+SELECT * FROM test WHERE column1 !~~* '__z%'
+----
+foo
+Barrr
+
statement ok
DROP TABLE test;
diff --git a/datafusion/substrait/src/logical_plan/producer.rs
b/datafusion/substrait/src/logical_plan/producer.rs
index ab0e8c8608..fc9517c90a 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -577,6 +577,10 @@ pub fn operator_to_name(op: Operator) -> &'static str {
Operator::RegexIMatch => "regex_imatch",
Operator::RegexNotMatch => "regex_not_match",
Operator::RegexNotIMatch => "regex_not_imatch",
+ Operator::LikeMatch => "like_match",
+ Operator::ILikeMatch => "like_imatch",
+ Operator::NotLikeMatch => "like_not_match",
+ Operator::NotILikeMatch => "like_not_imatch",
Operator::BitwiseAnd => "bitwise_and",
Operator::BitwiseOr => "bitwise_or",
Operator::StringConcat => "str_concat",
diff --git a/docs/source/user-guide/sql/operators.md
b/docs/source/user-guide/sql/operators.md
index 265e56bb2c..872ef55dd3 100644
--- a/docs/source/user-guide/sql/operators.md
+++ b/docs/source/user-guide/sql/operators.md
@@ -263,6 +263,58 @@ Not Regex Case-Insensitive Match
+---------------------------------------------------+
```
+### `~~`
+
+Like Match
+
+```sql
+❯ SELECT 'datafusion' ~~ 'dat_f%n';
++---------------------------------------+
+| Utf8("datafusion") ~~ Utf8("dat_f%n") |
++---------------------------------------+
+| true |
++---------------------------------------+
+```
+
+### `~~*`
+
+Case-Insensitive Like Match
+
+```sql
+❯ SELECT 'datafusion' ~~* 'Dat_F%n';
++----------------------------------------+
+| Utf8("datafusion") ~~* Utf8("Dat_F%n") |
++----------------------------------------+
+| true |
++----------------------------------------+
+```
+
+### `!~~`
+
+Not Like Match
+
+```sql
+❯ SELECT 'datafusion' !~~ 'Dat_F%n';
++----------------------------------------+
+| Utf8("datafusion") !~~ Utf8("Dat_F%n") |
++----------------------------------------+
+| true |
++----------------------------------------+
+```
+
+### `!~~*`
+
+Not Case-Insensitive Like Match
+
+```sql
+❯ SELECT 'datafusion' !~~* 'Dat%F_n';
++-----------------------------------------+
+| Utf8("datafusion") !~~* Utf8("Dat%F_n") |
++-----------------------------------------+
+| true |
++-----------------------------------------+
+```
+
## Logical Operators
- [AND](#and)