This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new bd38142de4 Add support for Postgres LIKE operators (#8894)
bd38142de4 is described below

commit bd38142de4da21fb7fefa1db997fa5c7eb95ffcc
Author: Marko Grujic <[email protected]>
AuthorDate: Fri Jan 26 12:02:15 2024 +0100

    Add support for Postgres LIKE operators (#8894)
    
    * Add support for PG LIKE operators
    
    * Bump sqlparser dep from branch to merge commit
---
 datafusion/expr/src/operator.rs                    |  24 +++++
 datafusion/expr/src/type_coercion/binary.rs        |   7 ++
 datafusion/physical-expr/src/expressions/binary.rs | 105 ++++++++++++++++++++-
 datafusion/sql/src/expr/binary_op.rs               |   4 +
 datafusion/sqllogictest/test_files/predicates.slt  |  24 +++++
 datafusion/substrait/src/logical_plan/producer.rs  |   4 +
 docs/source/user-guide/sql/operators.md            |  52 ++++++++++
 7 files changed, 218 insertions(+), 2 deletions(-)

diff --git a/datafusion/expr/src/operator.rs b/datafusion/expr/src/operator.rs
index 57888a11d4..a10312e234 100644
--- a/datafusion/expr/src/operator.rs
+++ b/datafusion/expr/src/operator.rs
@@ -69,6 +69,14 @@ pub enum Operator {
     RegexNotMatch,
     /// Case insensitive regex not match
     RegexNotIMatch,
+    /// Case sensitive pattern match
+    LikeMatch,
+    /// Case insensitive pattern match
+    ILikeMatch,
+    /// Case sensitive pattern not match
+    NotLikeMatch,
+    /// Case insensitive pattern not match
+    NotILikeMatch,
     /// Bitwise and, like `&`
     BitwiseAnd,
     /// Bitwise or, like `|`
@@ -100,6 +108,10 @@ impl Operator {
             Operator::GtEq => Some(Operator::Lt),
             Operator::IsDistinctFrom => Some(Operator::IsNotDistinctFrom),
             Operator::IsNotDistinctFrom => Some(Operator::IsDistinctFrom),
+            Operator::LikeMatch => Some(Operator::NotLikeMatch),
+            Operator::ILikeMatch => Some(Operator::NotILikeMatch),
+            Operator::NotLikeMatch => Some(Operator::LikeMatch),
+            Operator::NotILikeMatch => Some(Operator::ILikeMatch),
             Operator::Plus
             | Operator::Minus
             | Operator::Multiply
@@ -192,6 +204,10 @@ impl Operator {
             | Operator::RegexIMatch
             | Operator::RegexNotMatch
             | Operator::RegexNotIMatch
+            | Operator::LikeMatch
+            | Operator::ILikeMatch
+            | Operator::NotLikeMatch
+            | Operator::NotILikeMatch
             | Operator::BitwiseAnd
             | Operator::BitwiseOr
             | Operator::BitwiseXor
@@ -221,6 +237,10 @@ impl Operator {
             | Operator::RegexNotMatch
             | Operator::RegexIMatch
             | Operator::RegexNotIMatch
+            | Operator::LikeMatch
+            | Operator::ILikeMatch
+            | Operator::NotLikeMatch
+            | Operator::NotILikeMatch
             | Operator::BitwiseAnd
             | Operator::BitwiseOr
             | Operator::BitwiseShiftLeft
@@ -253,6 +273,10 @@ impl fmt::Display for Operator {
             Operator::RegexIMatch => "~*",
             Operator::RegexNotMatch => "!~",
             Operator::RegexNotIMatch => "!~*",
+            Operator::LikeMatch => "~~",
+            Operator::ILikeMatch => "~~*",
+            Operator::NotLikeMatch => "!~~",
+            Operator::NotILikeMatch => "!~~*",
             Operator::IsDistinctFrom => "IS DISTINCT FROM",
             Operator::IsNotDistinctFrom => "IS NOT DISTINCT FROM",
             Operator::BitwiseAnd => "&",
diff --git a/datafusion/expr/src/type_coercion/binary.rs 
b/datafusion/expr/src/type_coercion/binary.rs
index 6bacc18700..70015c6992 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -101,6 +101,13 @@ fn signature(lhs: &DataType, op: &Operator, rhs: 
&DataType) -> Result<Signature>
                 )
             })
         }
+        LikeMatch | ILikeMatch | NotLikeMatch | NotILikeMatch => {
+            regex_coercion(lhs, rhs).map(Signature::comparison).ok_or_else(|| {
+                plan_datafusion_err!(
+                    "Cannot infer common argument type for regex operation 
{lhs} {op} {rhs}"
+                )
+            })
+        }
         BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseShiftRight | 
BitwiseShiftLeft => {
             bitwise_coercion(lhs, rhs).map(Signature::uniform).ok_or_else(|| {
                 plan_datafusion_err!(
diff --git a/datafusion/physical-expr/src/expressions/binary.rs 
b/datafusion/physical-expr/src/expressions/binary.rs
index 8c4078dbce..3f13030092 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -28,12 +28,12 @@ use crate::sort_properties::SortProperties;
 use crate::PhysicalExpr;
 
 use arrow::array::*;
-use arrow::compute::cast;
 use arrow::compute::kernels::boolean::{and_kleene, not, or_kleene};
 use arrow::compute::kernels::cmp::*;
 use arrow::compute::kernels::comparison::regexp_is_match_utf8;
 use arrow::compute::kernels::comparison::regexp_is_match_utf8_scalar;
 use arrow::compute::kernels::concat_elements::concat_elements_utf8;
+use arrow::compute::{cast, ilike, like, nilike, nlike};
 use arrow::datatypes::*;
 use arrow::record_batch::RecordBatch;
 
@@ -281,6 +281,10 @@ impl PhysicalExpr for BinaryExpr {
             Operator::GtEq => return apply_cmp(&lhs, &rhs, gt_eq),
             Operator::IsDistinctFrom => return apply_cmp(&lhs, &rhs, distinct),
             Operator::IsNotDistinctFrom => return apply_cmp(&lhs, &rhs, 
not_distinct),
+            Operator::LikeMatch => return apply_cmp(&lhs, &rhs, like),
+            Operator::ILikeMatch => return apply_cmp(&lhs, &rhs, ilike),
+            Operator::NotLikeMatch => return apply_cmp(&lhs, &rhs, nlike),
+            Operator::NotILikeMatch => return apply_cmp(&lhs, &rhs, nilike),
             _ => {}
         }
 
@@ -554,7 +558,8 @@ impl BinaryExpr {
         use Operator::*;
         match &self.op {
             IsDistinctFrom | IsNotDistinctFrom | Lt | LtEq | Gt | GtEq | Eq | 
NotEq
-            | Plus | Minus | Multiply | Divide | Modulo => unreachable!(),
+            | Plus | Minus | Multiply | Divide | Modulo | LikeMatch | 
ILikeMatch
+            | NotLikeMatch | NotILikeMatch => unreachable!(),
             And => {
                 if left_data_type == &DataType::Boolean {
                     boolean_op!(&left, &right, and_kleene)
@@ -970,6 +975,102 @@ mod tests {
             DataType::Boolean,
             [false, false, false, false, true],
         );
+        test_coercion!(
+            StringArray,
+            DataType::Utf8,
+            vec!["abc"; 5],
+            StringArray,
+            DataType::Utf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::LikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [true, false, false, true, false],
+        );
+        test_coercion!(
+            StringArray,
+            DataType::Utf8,
+            vec!["abc"; 5],
+            StringArray,
+            DataType::Utf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::ILikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [true, true, false, true, true],
+        );
+        test_coercion!(
+            StringArray,
+            DataType::Utf8,
+            vec!["abc"; 5],
+            StringArray,
+            DataType::Utf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::NotLikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [false, true, true, false, true],
+        );
+        test_coercion!(
+            StringArray,
+            DataType::Utf8,
+            vec!["abc"; 5],
+            StringArray,
+            DataType::Utf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::NotILikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [false, false, true, false, false],
+        );
+        test_coercion!(
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["abc"; 5],
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::LikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [true, false, false, true, false],
+        );
+        test_coercion!(
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["abc"; 5],
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::ILikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [true, true, false, true, true],
+        );
+        test_coercion!(
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["abc"; 5],
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::NotLikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [false, true, true, false, true],
+        );
+        test_coercion!(
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["abc"; 5],
+            LargeStringArray,
+            DataType::LargeUtf8,
+            vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
+            Operator::NotILikeMatch,
+            BooleanArray,
+            DataType::Boolean,
+            [false, false, true, false, false],
+        );
         test_coercion!(
             Int16Array,
             DataType::Int16,
diff --git a/datafusion/sql/src/expr/binary_op.rs 
b/datafusion/sql/src/expr/binary_op.rs
index d9c85663e5..78efaca099 100644
--- a/datafusion/sql/src/expr/binary_op.rs
+++ b/datafusion/sql/src/expr/binary_op.rs
@@ -40,6 +40,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             BinaryOperator::PGRegexIMatch => Ok(Operator::RegexIMatch),
             BinaryOperator::PGRegexNotMatch => Ok(Operator::RegexNotMatch),
             BinaryOperator::PGRegexNotIMatch => Ok(Operator::RegexNotIMatch),
+            BinaryOperator::PGLikeMatch => Ok(Operator::LikeMatch),
+            BinaryOperator::PGILikeMatch => Ok(Operator::ILikeMatch),
+            BinaryOperator::PGNotLikeMatch => Ok(Operator::NotLikeMatch),
+            BinaryOperator::PGNotILikeMatch => Ok(Operator::NotILikeMatch),
             BinaryOperator::BitwiseAnd => Ok(Operator::BitwiseAnd),
             BinaryOperator::BitwiseOr => Ok(Operator::BitwiseOr),
             BinaryOperator::BitwiseXor => Ok(Operator::BitwiseXor),
diff --git a/datafusion/sqllogictest/test_files/predicates.slt 
b/datafusion/sqllogictest/test_files/predicates.slt
index b5347f997a..ba407f6d28 100644
--- a/datafusion/sqllogictest/test_files/predicates.slt
+++ b/datafusion/sqllogictest/test_files/predicates.slt
@@ -220,6 +220,30 @@ SELECT * FROM test WHERE column1 !~* 'z'
 foo
 Barrr
 
+query T
+SELECT * FROM test WHERE column1 ~~ '__z%'
+----
+Bazzz
+
+query T
+SELECT * FROM test WHERE column1 ~~* '__z%'
+----
+Bazzz
+ZZZZZ
+
+query T
+SELECT * FROM test WHERE column1 !~~ '__z%'
+----
+foo
+Barrr
+ZZZZZ
+
+query T
+SELECT * FROM test WHERE column1 !~~* '__z%'
+----
+foo
+Barrr
+
 statement ok
 DROP TABLE test;
 
diff --git a/datafusion/substrait/src/logical_plan/producer.rs 
b/datafusion/substrait/src/logical_plan/producer.rs
index ab0e8c8608..fc9517c90a 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -577,6 +577,10 @@ pub fn operator_to_name(op: Operator) -> &'static str {
         Operator::RegexIMatch => "regex_imatch",
         Operator::RegexNotMatch => "regex_not_match",
         Operator::RegexNotIMatch => "regex_not_imatch",
+        Operator::LikeMatch => "like_match",
+        Operator::ILikeMatch => "like_imatch",
+        Operator::NotLikeMatch => "like_not_match",
+        Operator::NotILikeMatch => "like_not_imatch",
         Operator::BitwiseAnd => "bitwise_and",
         Operator::BitwiseOr => "bitwise_or",
         Operator::StringConcat => "str_concat",
diff --git a/docs/source/user-guide/sql/operators.md 
b/docs/source/user-guide/sql/operators.md
index 265e56bb2c..872ef55dd3 100644
--- a/docs/source/user-guide/sql/operators.md
+++ b/docs/source/user-guide/sql/operators.md
@@ -263,6 +263,58 @@ Not Regex Case-Insensitive Match
 +---------------------------------------------------+
 ```
 
+### `~~`
+
+Like Match
+
+```sql
+❯ SELECT 'datafusion' ~~ 'dat_f%n';
++---------------------------------------+
+| Utf8("datafusion") ~~ Utf8("dat_f%n") |
++---------------------------------------+
+| true                                  |
++---------------------------------------+
+```
+
+### `~~*`
+
+Case-Insensitive Like Match
+
+```sql
+❯ SELECT 'datafusion' ~~* 'Dat_F%n';
++----------------------------------------+
+| Utf8("datafusion") ~~* Utf8("Dat_F%n") |
++----------------------------------------+
+| true                                   |
++----------------------------------------+
+```
+
+### `!~~`
+
+Not Like Match
+
+```sql
+❯ SELECT 'datafusion' !~~ 'Dat_F%n';
++----------------------------------------+
+| Utf8("datafusion") !~~ Utf8("Dat_F%n") |
++----------------------------------------+
+| true                                   |
++----------------------------------------+
+```
+
+### `!~~*`
+
+Not Case-Insensitive Like Match
+
+```sql
+❯ SELECT 'datafusion' !~~* 'Dat%F_n';
++-----------------------------------------+
+| Utf8("datafusion") !~~* Utf8("Dat%F_n") |
++-----------------------------------------+
+| true                                    |
++-----------------------------------------+
+```
+
 ## Logical Operators
 
 - [AND](#and)

Reply via email to