This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 5ad621fd97 Fix LIKE with escapes (#6703)
5ad621fd97 is described below

commit 5ad621fd97032796f3da2d02948e65513de8c891
Author: Piotr Findeisen <[email protected]>
AuthorDate: Sat Nov 9 08:43:46 2024 +0100

    Fix LIKE with escapes (#6703)
    
    * Fix LIKE with escapes
    
    Fix LIKE processing for patterns containing escapes
    
    - the starts_with / ends_with optimization did not correctly check for
      escapes when checking rest of the pattern for being literal or not
    - the pattern to regexp compiler incorrectly processed \ followed by a
      character other than % or _. In PostgreSQL '\x' pattern matches single
      'x'.
    
    There are two tests
    
    - like_escape_many was generated using PostgreSQL with the code attached
      below for verification
    - like_escape is hand-picked test cases that are more interesting.
      Lower cardinality of hand-picked test cases allows for exercising all
      scalar/array vs scalar/array combinations.
    
    The below script isn't simples possible, because it was attempted to
    generate more test cases by adding padding. Hence e.g.
    is_like_without_dangling_escape.  Since this is attached for reference,
    should be attached as-is.
    
    ```python
    import psycopg2
    
    data = r"""
    \
    \\
    \\\
    \\\\
    a
    \a
    \\a
    %
    \%
    \\%
    %%
    \%%
    \\%%
    _
    \_
    \\_
    __
    \__
    \\__
    abc
    a_c
    a\bc
    a\_c
    %abc
    \%abc
    a\\_c%
    """.split('\n')
    
    data = list(dict.fromkeys(data))
    
    conn = psycopg2.connect(host='localhost', port=5432, user='postgres', 
password='mysecretpassword')
    conn.set_session(autocommit=True)
    cursor = conn.cursor()
    for r in data:
        try:
            # PostgreSQL verifies dandling escape only sometimes
            cursor.execute(f"SELECT %s LIKE %s", (r, r))
            is_like, = cursor.fetchone()
            has_dandling_escape = False
            pg_pattern = r
        except Exception as e:
            if 'LIKE pattern must not end with escape character' not in str(e):
                raise e
            has_dandling_escape = True
            pg_pattern = r + '\\'
    
        for l in data:
            # print()
            # print('     '.join(str(v) for v in (l, r, has_dandling_escape, 
postgres_pattern)))
            cursor.execute(f"SELECT %s LIKE %s", (l, pg_pattern))
            is_like, = cursor.fetchone()
            assert type(is_like) is bool
    
            if not is_like and has_dandling_escape:
                pattern_without_escaped_dandling_escape = pg_pattern[:-2]
                cursor.execute(f"SELECT %s LIKE %s", (l, 
pattern_without_escaped_dandling_escape))
                is_like_without_dangling_escape, = cursor.fetchone()
                assert type(is_like_without_dangling_escape) is bool
            else:
                is_like_without_dangling_escape = False
            assert '"' not in l
            assert '"' not in r
            print('(r"%s", r"%s", %s),' % (
                l, r,
                str(is_like).lower(),
                # str(has_dandling_escape).lower(),
                # str(is_like_without_dangling_escape).lower(),
            ))
    ```
    
    * Compact tests for regex_like
    
    Reduce test code boilerplate and make it easier to see what are the test
    cases.
    
    * Add more test cases for regex_like
---
 arrow-string/src/like.rs      | 1058 +++++++++++++++++++++++++++++++++++++++++
 arrow-string/src/predicate.rs |   95 ++--
 2 files changed, 1094 insertions(+), 59 deletions(-)

diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs
index 1b04b4eb56..0a5aa77dbb 100644
--- a/arrow-string/src/like.rs
+++ b/arrow-string/src/like.rs
@@ -429,6 +429,7 @@ legacy_kernels!(
 mod tests {
     use super::*;
     use arrow_array::types::Int8Type;
+    use std::iter::zip;
 
     /// Applying `op(left, right)`, both sides are arrays
     /// The macro tests four types of array implementations:
@@ -1864,4 +1865,1061 @@ mod tests {
             assert!(r.is_null(0));
         }
     }
+
+    #[test]
+    fn like_escape() {
+        // (value, pattern, expected)
+        let test_cases = vec![
+            // Empty pattern
+            (r"", r"", true),
+            (r"\", r"", false),
+            // Sole (dangling) escape (some engines consider this invalid 
pattern)
+            (r"", r"\", false),
+            (r"\", r"\", true),
+            (r"\\", r"\", false),
+            (r"a", r"\", false),
+            (r"\a", r"\", false),
+            (r"\\a", r"\", false),
+            // Sole escape
+            (r"", r"\\", false),
+            (r"\", r"\\", true),
+            (r"\\", r"\\", false),
+            (r"a", r"\\", false),
+            (r"\a", r"\\", false),
+            (r"\\a", r"\\", false),
+            // Sole escape and dangling escape
+            (r"", r"\\\", false),
+            (r"\", r"\\\", false),
+            (r"\\", r"\\\", true),
+            (r"\\\", r"\\\", false),
+            (r"\\\\", r"\\\", false),
+            (r"a", r"\\\", false),
+            (r"\a", r"\\\", false),
+            (r"\\a", r"\\\", false),
+            // Sole two escapes
+            (r"", r"\\\\", false),
+            (r"\", r"\\\\", false),
+            (r"\\", r"\\\\", true),
+            (r"\\\", r"\\\\", false),
+            (r"\\\\", r"\\\\", false),
+            (r"\\\\\", r"\\\\", false),
+            (r"a", r"\\\\", false),
+            (r"\a", r"\\\\", false),
+            (r"\\a", r"\\\\", false),
+            // Escaped non-wildcard
+            (r"", r"\a", false),
+            (r"\", r"\a", false),
+            (r"\\", r"\a", false),
+            (r"a", r"\a", true),
+            (r"\a", r"\a", false),
+            (r"\\a", r"\a", false),
+            // Escaped _ wildcard
+            (r"", r"\_", false),
+            (r"\", r"\_", false),
+            (r"\\", r"\_", false),
+            (r"a", r"\_", false),
+            (r"_", r"\_", true),
+            (r"%", r"\_", false),
+            (r"\a", r"\_", false),
+            (r"\\a", r"\_", false),
+            (r"\_", r"\_", false),
+            (r"\\_", r"\_", false),
+            // Escaped % wildcard
+            (r"", r"\%", false),
+            (r"\", r"\%", false),
+            (r"\\", r"\%", false),
+            (r"a", r"\%", false),
+            (r"_", r"\%", false),
+            (r"%", r"\%", true),
+            (r"\a", r"\%", false),
+            (r"\\a", r"\%", false),
+            (r"\%", r"\%", false),
+            (r"\\%", r"\%", false),
+            // Escape and non-wildcard
+            (r"", r"\\a", false),
+            (r"\", r"\\a", false),
+            (r"\\", r"\\a", false),
+            (r"a", r"\\a", false),
+            (r"\a", r"\\a", true),
+            (r"\\a", r"\\a", false),
+            (r"\\\a", r"\\a", false),
+            // Escape and _ wildcard
+            (r"", r"\\_", false),
+            (r"\", r"\\_", false),
+            (r"\\", r"\\_", true),
+            (r"a", r"\\_", false),
+            (r"_", r"\\_", false),
+            (r"%", r"\\_", false),
+            (r"\a", r"\\_", true),
+            (r"\\a", r"\\_", false),
+            (r"\_", r"\\_", true),
+            (r"\\_", r"\\_", false),
+            (r"\\\_", r"\\_", false),
+            // Escape and % wildcard
+            (r"", r"\\%", false),
+            (r"\", r"\\%", true),
+            (r"\\", r"\\%", true),
+            (r"a", r"\\%", false),
+            (r"ab", r"\\%", false),
+            (r"a%", r"\\%", false),
+            (r"_", r"\\%", false),
+            (r"%", r"\\%", false),
+            (r"\a", r"\\%", true),
+            (r"\\a", r"\\%", true),
+            (r"\%", r"\\%", true),
+            (r"\\%", r"\\%", true),
+            (r"\\\%", r"\\%", true),
+            // %... pattern with dangling wildcard
+            (r"\", r"%\", true),
+            (r"\\", r"%\", true),
+            (r"%\", r"%\", true),
+            (r"%\\", r"%\", true),
+            (r"abc\", r"%\", true),
+            (r"abc", r"%\", false),
+            // %... pattern with wildcard
+            (r"\", r"%\\", true),
+            (r"\\", r"%\\", true),
+            (r"%\\", r"%\\", true),
+            (r"%\\\", r"%\\", true),
+            (r"abc\", r"%\\", true),
+            (r"abc", r"%\\", false),
+            // %... pattern including escaped non-wildcard
+            (r"ac", r"%a\c", true),
+            (r"xyzac", r"%a\c", true),
+            (r"abc", r"%a\c", false),
+            (r"a\c", r"%a\c", false),
+            (r"%a\c", r"%a\c", false),
+            // %... pattern including escape
+            (r"\", r"%a\\c", false),
+            (r"\\", r"%a\\c", false),
+            (r"ac", r"%a\\c", false),
+            (r"a\c", r"%a\\c", true),
+            (r"a\\c", r"%a\\c", false),
+            (r"abc", r"%a\\c", false),
+            (r"xyza\c", r"%a\\c", true),
+            (r"xyza\\c", r"%a\\c", false),
+            (r"%a\\c", r"%a\\c", false),
+            // ...% pattern with wildcard
+            (r"\", r"\\%", true),
+            (r"\\", r"\\%", true),
+            (r"\\%", r"\\%", true),
+            (r"\\\%", r"\\%", true),
+            (r"\abc", r"\\%", true),
+            (r"a", r"\\%", false),
+            (r"abc", r"\\%", false),
+            // ...% pattern including escaped non-wildcard
+            (r"ac", r"a\c%", true),
+            (r"acxyz", r"a\c%", true),
+            (r"abc", r"a\c%", false),
+            (r"a\c", r"a\c%", false),
+            (r"a\c%", r"a\c%", false),
+            (r"a\\c%", r"a\c%", false),
+            // ...% pattern including escape
+            (r"ac", r"a\\c%", false),
+            (r"a\c", r"a\\c%", true),
+            (r"a\cxyz", r"a\\c%", true),
+            (r"a\\c", r"a\\c%", false),
+            (r"a\\cxyz", r"a\\c%", false),
+            (r"abc", r"a\\c%", false),
+            (r"abcxyz", r"a\\c%", false),
+            (r"a\\c%", r"a\\c%", false),
+            // %...% pattern including escaped non-wildcard
+            (r"ac", r"%a\c%", true),
+            (r"xyzacxyz", r"%a\c%", true),
+            (r"abc", r"%a\c%", false),
+            (r"a\c", r"%a\c%", false),
+            (r"xyza\cxyz", r"%a\c%", false),
+            (r"%a\c%", r"%a\c%", false),
+            (r"%a\\c%", r"%a\c%", false),
+            // %...% pattern including escape
+            (r"ac", r"%a\\c%", false),
+            (r"a\c", r"%a\\c%", true),
+            (r"xyza\cxyz", r"%a\\c%", true),
+            (r"a\\c", r"%a\\c%", false),
+            (r"xyza\\cxyz", r"%a\\c%", false),
+            (r"abc", r"%a\\c%", false),
+            (r"xyzabcxyz", r"%a\\c%", false),
+            (r"%a\\c%", r"%a\\c%", false),
+            // Odd (7) backslashes and % wildcard
+            (r"\\%", r"\\\\\\\%", false),
+            (r"\\\", r"\\\\\\\%", false),
+            (r"\\\%", r"\\\\\\\%", true),
+            (r"\\\\", r"\\\\\\\%", false),
+            (r"\\\\%", r"\\\\\\\%", false),
+            (r"\\\\\\\%", r"\\\\\\\%", false),
+            // Odd (7) backslashes and _ wildcard
+            (r"\\\", r"\\\\\\\_", false),
+            (r"\\\\", r"\\\\\\\_", false),
+            (r"\\\_", r"\\\\\\\_", true),
+            (r"\\\\", r"\\\\\\\_", false),
+            (r"\\\a", r"\\\\\\\_", false),
+            (r"\\\\_", r"\\\\\\\_", false),
+            (r"\\\\\\\_", r"\\\\\\\_", false),
+            // Even (8) backslashes and % wildcard
+            (r"\\\", r"\\\\\\\\%", false),
+            (r"\\\\", r"\\\\\\\\%", true),
+            (r"\\\\\", r"\\\\\\\\%", true),
+            (r"\\\\xyz", r"\\\\\\\\%", true),
+            (r"\\\\\\\\%", r"\\\\\\\\%", true),
+            // Even (8) backslashes and _ wildcard
+            (r"\\\", r"\\\\\\\\_", false),
+            (r"\\\\", r"\\\\\\\\_", false),
+            (r"\\\\\", r"\\\\\\\\_", true),
+            (r"\\\\a", r"\\\\\\\\_", true),
+            (r"\\\\\a", r"\\\\\\\\_", false),
+            (r"\\\\ab", r"\\\\\\\\_", false),
+            (r"\\\\\\\\_", r"\\\\\\\\_", false),
+        ];
+
+        for (value, pattern, expected) in test_cases {
+            let unexpected = BooleanArray::from(vec![!expected]);
+            let expected = BooleanArray::from(vec![expected]);
+
+            for string_type in [DataType::Utf8, DataType::LargeUtf8, 
DataType::Utf8View] {
+                for ((value_datum, value_type), (pattern_datum, pattern_type)) 
in zip(
+                    make_datums(value, &string_type),
+                    make_datums(pattern, &string_type),
+                ) {
+                    let value_datum = value_datum.as_ref();
+                    let pattern_datum = pattern_datum.as_ref();
+                    assert_eq!(
+                        like(value_datum, pattern_datum).unwrap(),
+                        expected,
+                        "{value_type:?} «{value}» like {pattern_type:?} 
«{pattern}»"
+                    );
+                    assert_eq!(
+                        ilike(value_datum, pattern_datum).unwrap(),
+                        expected,
+                        "{value_type:?} «{value}» ilike {pattern_type:?} 
«{pattern}»"
+                    );
+                    assert_eq!(
+                        nlike(value_datum, pattern_datum).unwrap(),
+                        unexpected,
+                        "{value_type:?} «{value}» nlike {pattern_type:?} 
«{pattern}»"
+                    );
+                    assert_eq!(
+                        nilike(value_datum, pattern_datum).unwrap(),
+                        unexpected,
+                        "{value_type:?} «{value}» nilike {pattern_type:?} 
«{pattern}»"
+                    );
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn like_escape_many() {
+        // (value, pattern, expected)
+        let test_cases = vec![
+            (r"", r"", true),
+            (r"\", r"", false),
+            (r"\\", r"", false),
+            (r"\\\", r"", false),
+            (r"\\\\", r"", false),
+            (r"a", r"", false),
+            (r"\a", r"", false),
+            (r"\\a", r"", false),
+            (r"%", r"", false),
+            (r"\%", r"", false),
+            (r"\\%", r"", false),
+            (r"%%", r"", false),
+            (r"\%%", r"", false),
+            (r"\\%%", r"", false),
+            (r"_", r"", false),
+            (r"\_", r"", false),
+            (r"\\_", r"", false),
+            (r"__", r"", false),
+            (r"\__", r"", false),
+            (r"\\__", r"", false),
+            (r"abc", r"", false),
+            (r"a_c", r"", false),
+            (r"a\bc", r"", false),
+            (r"a\_c", r"", false),
+            (r"%abc", r"", false),
+            (r"\%abc", r"", false),
+            (r"a\\_c%", r"", false),
+            (r"", r"\", false),
+            (r"\", r"\", true),
+            (r"\\", r"\", false),
+            (r"\\\", r"\", false),
+            (r"\\\\", r"\", false),
+            (r"a", r"\", false),
+            (r"\a", r"\", false),
+            (r"\\a", r"\", false),
+            (r"%", r"\", false),
+            (r"\%", r"\", false),
+            (r"\\%", r"\", false),
+            (r"%%", r"\", false),
+            (r"\%%", r"\", false),
+            (r"\\%%", r"\", false),
+            (r"_", r"\", false),
+            (r"\_", r"\", false),
+            (r"\\_", r"\", false),
+            (r"__", r"\", false),
+            (r"\__", r"\", false),
+            (r"\\__", r"\", false),
+            (r"abc", r"\", false),
+            (r"a_c", r"\", false),
+            (r"a\bc", r"\", false),
+            (r"a\_c", r"\", false),
+            (r"%abc", r"\", false),
+            (r"\%abc", r"\", false),
+            (r"a\\_c%", r"\", false),
+            (r"", r"\\", false),
+            (r"\", r"\\", true),
+            (r"\\", r"\\", false),
+            (r"\\\", r"\\", false),
+            (r"\\\\", r"\\", false),
+            (r"a", r"\\", false),
+            (r"\a", r"\\", false),
+            (r"\\a", r"\\", false),
+            (r"%", r"\\", false),
+            (r"\%", r"\\", false),
+            (r"\\%", r"\\", false),
+            (r"%%", r"\\", false),
+            (r"\%%", r"\\", false),
+            (r"\\%%", r"\\", false),
+            (r"_", r"\\", false),
+            (r"\_", r"\\", false),
+            (r"\\_", r"\\", false),
+            (r"__", r"\\", false),
+            (r"\__", r"\\", false),
+            (r"\\__", r"\\", false),
+            (r"abc", r"\\", false),
+            (r"a_c", r"\\", false),
+            (r"a\bc", r"\\", false),
+            (r"a\_c", r"\\", false),
+            (r"%abc", r"\\", false),
+            (r"\%abc", r"\\", false),
+            (r"a\\_c%", r"\\", false),
+            (r"", r"\\\", false),
+            (r"\", r"\\\", false),
+            (r"\\", r"\\\", true),
+            (r"\\\", r"\\\", false),
+            (r"\\\\", r"\\\", false),
+            (r"a", r"\\\", false),
+            (r"\a", r"\\\", false),
+            (r"\\a", r"\\\", false),
+            (r"%", r"\\\", false),
+            (r"\%", r"\\\", false),
+            (r"\\%", r"\\\", false),
+            (r"%%", r"\\\", false),
+            (r"\%%", r"\\\", false),
+            (r"\\%%", r"\\\", false),
+            (r"_", r"\\\", false),
+            (r"\_", r"\\\", false),
+            (r"\\_", r"\\\", false),
+            (r"__", r"\\\", false),
+            (r"\__", r"\\\", false),
+            (r"\\__", r"\\\", false),
+            (r"abc", r"\\\", false),
+            (r"a_c", r"\\\", false),
+            (r"a\bc", r"\\\", false),
+            (r"a\_c", r"\\\", false),
+            (r"%abc", r"\\\", false),
+            (r"\%abc", r"\\\", false),
+            (r"a\\_c%", r"\\\", false),
+            (r"", r"\\\\", false),
+            (r"\", r"\\\\", false),
+            (r"\\", r"\\\\", true),
+            (r"\\\", r"\\\\", false),
+            (r"\\\\", r"\\\\", false),
+            (r"a", r"\\\\", false),
+            (r"\a", r"\\\\", false),
+            (r"\\a", r"\\\\", false),
+            (r"%", r"\\\\", false),
+            (r"\%", r"\\\\", false),
+            (r"\\%", r"\\\\", false),
+            (r"%%", r"\\\\", false),
+            (r"\%%", r"\\\\", false),
+            (r"\\%%", r"\\\\", false),
+            (r"_", r"\\\\", false),
+            (r"\_", r"\\\\", false),
+            (r"\\_", r"\\\\", false),
+            (r"__", r"\\\\", false),
+            (r"\__", r"\\\\", false),
+            (r"\\__", r"\\\\", false),
+            (r"abc", r"\\\\", false),
+            (r"a_c", r"\\\\", false),
+            (r"a\bc", r"\\\\", false),
+            (r"a\_c", r"\\\\", false),
+            (r"%abc", r"\\\\", false),
+            (r"\%abc", r"\\\\", false),
+            (r"a\\_c%", r"\\\\", false),
+            (r"", r"a", false),
+            (r"\", r"a", false),
+            (r"\\", r"a", false),
+            (r"\\\", r"a", false),
+            (r"\\\\", r"a", false),
+            (r"a", r"a", true),
+            (r"\a", r"a", false),
+            (r"\\a", r"a", false),
+            (r"%", r"a", false),
+            (r"\%", r"a", false),
+            (r"\\%", r"a", false),
+            (r"%%", r"a", false),
+            (r"\%%", r"a", false),
+            (r"\\%%", r"a", false),
+            (r"_", r"a", false),
+            (r"\_", r"a", false),
+            (r"\\_", r"a", false),
+            (r"__", r"a", false),
+            (r"\__", r"a", false),
+            (r"\\__", r"a", false),
+            (r"abc", r"a", false),
+            (r"a_c", r"a", false),
+            (r"a\bc", r"a", false),
+            (r"a\_c", r"a", false),
+            (r"%abc", r"a", false),
+            (r"\%abc", r"a", false),
+            (r"a\\_c%", r"a", false),
+            (r"", r"\a", false),
+            (r"\", r"\a", false),
+            (r"\\", r"\a", false),
+            (r"\\\", r"\a", false),
+            (r"\\\\", r"\a", false),
+            (r"a", r"\a", true),
+            (r"\a", r"\a", false),
+            (r"\\a", r"\a", false),
+            (r"%", r"\a", false),
+            (r"\%", r"\a", false),
+            (r"\\%", r"\a", false),
+            (r"%%", r"\a", false),
+            (r"\%%", r"\a", false),
+            (r"\\%%", r"\a", false),
+            (r"_", r"\a", false),
+            (r"\_", r"\a", false),
+            (r"\\_", r"\a", false),
+            (r"__", r"\a", false),
+            (r"\__", r"\a", false),
+            (r"\\__", r"\a", false),
+            (r"abc", r"\a", false),
+            (r"a_c", r"\a", false),
+            (r"a\bc", r"\a", false),
+            (r"a\_c", r"\a", false),
+            (r"%abc", r"\a", false),
+            (r"\%abc", r"\a", false),
+            (r"a\\_c%", r"\a", false),
+            (r"", r"\\a", false),
+            (r"\", r"\\a", false),
+            (r"\\", r"\\a", false),
+            (r"\\\", r"\\a", false),
+            (r"\\\\", r"\\a", false),
+            (r"a", r"\\a", false),
+            (r"\a", r"\\a", true),
+            (r"\\a", r"\\a", false),
+            (r"%", r"\\a", false),
+            (r"\%", r"\\a", false),
+            (r"\\%", r"\\a", false),
+            (r"%%", r"\\a", false),
+            (r"\%%", r"\\a", false),
+            (r"\\%%", r"\\a", false),
+            (r"_", r"\\a", false),
+            (r"\_", r"\\a", false),
+            (r"\\_", r"\\a", false),
+            (r"__", r"\\a", false),
+            (r"\__", r"\\a", false),
+            (r"\\__", r"\\a", false),
+            (r"abc", r"\\a", false),
+            (r"a_c", r"\\a", false),
+            (r"a\bc", r"\\a", false),
+            (r"a\_c", r"\\a", false),
+            (r"%abc", r"\\a", false),
+            (r"\%abc", r"\\a", false),
+            (r"a\\_c%", r"\\a", false),
+            (r"", r"%", true),
+            (r"\", r"%", true),
+            (r"\\", r"%", true),
+            (r"\\\", r"%", true),
+            (r"\\\\", r"%", true),
+            (r"a", r"%", true),
+            (r"\a", r"%", true),
+            (r"\\a", r"%", true),
+            (r"%", r"%", true),
+            (r"\%", r"%", true),
+            (r"\\%", r"%", true),
+            (r"%%", r"%", true),
+            (r"\%%", r"%", true),
+            (r"\\%%", r"%", true),
+            (r"_", r"%", true),
+            (r"\_", r"%", true),
+            (r"\\_", r"%", true),
+            (r"__", r"%", true),
+            (r"\__", r"%", true),
+            (r"\\__", r"%", true),
+            (r"abc", r"%", true),
+            (r"a_c", r"%", true),
+            (r"a\bc", r"%", true),
+            (r"a\_c", r"%", true),
+            (r"%abc", r"%", true),
+            (r"\%abc", r"%", true),
+            (r"a\\_c%", r"%", true),
+            (r"", r"\%", false),
+            (r"\", r"\%", false),
+            (r"\\", r"\%", false),
+            (r"\\\", r"\%", false),
+            (r"\\\\", r"\%", false),
+            (r"a", r"\%", false),
+            (r"\a", r"\%", false),
+            (r"\\a", r"\%", false),
+            (r"%", r"\%", true),
+            (r"\%", r"\%", false),
+            (r"\\%", r"\%", false),
+            (r"%%", r"\%", false),
+            (r"\%%", r"\%", false),
+            (r"\\%%", r"\%", false),
+            (r"_", r"\%", false),
+            (r"\_", r"\%", false),
+            (r"\\_", r"\%", false),
+            (r"__", r"\%", false),
+            (r"\__", r"\%", false),
+            (r"\\__", r"\%", false),
+            (r"abc", r"\%", false),
+            (r"a_c", r"\%", false),
+            (r"a\bc", r"\%", false),
+            (r"a\_c", r"\%", false),
+            (r"%abc", r"\%", false),
+            (r"\%abc", r"\%", false),
+            (r"a\\_c%", r"\%", false),
+            (r"", r"\\%", false),
+            (r"\", r"\\%", true),
+            (r"\\", r"\\%", true),
+            (r"\\\", r"\\%", true),
+            (r"\\\\", r"\\%", true),
+            (r"a", r"\\%", false),
+            (r"\a", r"\\%", true),
+            (r"\\a", r"\\%", true),
+            (r"%", r"\\%", false),
+            (r"\%", r"\\%", true),
+            (r"\\%", r"\\%", true),
+            (r"%%", r"\\%", false),
+            (r"\%%", r"\\%", true),
+            (r"\\%%", r"\\%", true),
+            (r"_", r"\\%", false),
+            (r"\_", r"\\%", true),
+            (r"\\_", r"\\%", true),
+            (r"__", r"\\%", false),
+            (r"\__", r"\\%", true),
+            (r"\\__", r"\\%", true),
+            (r"abc", r"\\%", false),
+            (r"a_c", r"\\%", false),
+            (r"a\bc", r"\\%", false),
+            (r"a\_c", r"\\%", false),
+            (r"%abc", r"\\%", false),
+            (r"\%abc", r"\\%", true),
+            (r"a\\_c%", r"\\%", false),
+            (r"", r"%%", true),
+            (r"\", r"%%", true),
+            (r"\\", r"%%", true),
+            (r"\\\", r"%%", true),
+            (r"\\\\", r"%%", true),
+            (r"a", r"%%", true),
+            (r"\a", r"%%", true),
+            (r"\\a", r"%%", true),
+            (r"%", r"%%", true),
+            (r"\%", r"%%", true),
+            (r"\\%", r"%%", true),
+            (r"%%", r"%%", true),
+            (r"\%%", r"%%", true),
+            (r"\\%%", r"%%", true),
+            (r"_", r"%%", true),
+            (r"\_", r"%%", true),
+            (r"\\_", r"%%", true),
+            (r"__", r"%%", true),
+            (r"\__", r"%%", true),
+            (r"\\__", r"%%", true),
+            (r"abc", r"%%", true),
+            (r"a_c", r"%%", true),
+            (r"a\bc", r"%%", true),
+            (r"a\_c", r"%%", true),
+            (r"%abc", r"%%", true),
+            (r"\%abc", r"%%", true),
+            (r"a\\_c%", r"%%", true),
+            (r"", r"\%%", false),
+            (r"\", r"\%%", false),
+            (r"\\", r"\%%", false),
+            (r"\\\", r"\%%", false),
+            (r"\\\\", r"\%%", false),
+            (r"a", r"\%%", false),
+            (r"\a", r"\%%", false),
+            (r"\\a", r"\%%", false),
+            (r"%", r"\%%", true),
+            (r"\%", r"\%%", false),
+            (r"\\%", r"\%%", false),
+            (r"%%", r"\%%", true),
+            (r"\%%", r"\%%", false),
+            (r"\\%%", r"\%%", false),
+            (r"_", r"\%%", false),
+            (r"\_", r"\%%", false),
+            (r"\\_", r"\%%", false),
+            (r"__", r"\%%", false),
+            (r"\__", r"\%%", false),
+            (r"\\__", r"\%%", false),
+            (r"abc", r"\%%", false),
+            (r"a_c", r"\%%", false),
+            (r"a\bc", r"\%%", false),
+            (r"a\_c", r"\%%", false),
+            (r"%abc", r"\%%", true),
+            (r"\%abc", r"\%%", false),
+            (r"a\\_c%", r"\%%", false),
+            (r"", r"\\%%", false),
+            (r"\", r"\\%%", true),
+            (r"\\", r"\\%%", true),
+            (r"\\\", r"\\%%", true),
+            (r"\\\\", r"\\%%", true),
+            (r"a", r"\\%%", false),
+            (r"\a", r"\\%%", true),
+            (r"\\a", r"\\%%", true),
+            (r"%", r"\\%%", false),
+            (r"\%", r"\\%%", true),
+            (r"\\%", r"\\%%", true),
+            (r"%%", r"\\%%", false),
+            (r"\%%", r"\\%%", true),
+            (r"\\%%", r"\\%%", true),
+            (r"_", r"\\%%", false),
+            (r"\_", r"\\%%", true),
+            (r"\\_", r"\\%%", true),
+            (r"__", r"\\%%", false),
+            (r"\__", r"\\%%", true),
+            (r"\\__", r"\\%%", true),
+            (r"abc", r"\\%%", false),
+            (r"a_c", r"\\%%", false),
+            (r"a\bc", r"\\%%", false),
+            (r"a\_c", r"\\%%", false),
+            (r"%abc", r"\\%%", false),
+            (r"\%abc", r"\\%%", true),
+            (r"a\\_c%", r"\\%%", false),
+            (r"", r"_", false),
+            (r"\", r"_", true),
+            (r"\\", r"_", false),
+            (r"\\\", r"_", false),
+            (r"\\\\", r"_", false),
+            (r"a", r"_", true),
+            (r"\a", r"_", false),
+            (r"\\a", r"_", false),
+            (r"%", r"_", true),
+            (r"\%", r"_", false),
+            (r"\\%", r"_", false),
+            (r"%%", r"_", false),
+            (r"\%%", r"_", false),
+            (r"\\%%", r"_", false),
+            (r"_", r"_", true),
+            (r"\_", r"_", false),
+            (r"\\_", r"_", false),
+            (r"__", r"_", false),
+            (r"\__", r"_", false),
+            (r"\\__", r"_", false),
+            (r"abc", r"_", false),
+            (r"a_c", r"_", false),
+            (r"a\bc", r"_", false),
+            (r"a\_c", r"_", false),
+            (r"%abc", r"_", false),
+            (r"\%abc", r"_", false),
+            (r"a\\_c%", r"_", false),
+            (r"", r"\_", false),
+            (r"\", r"\_", false),
+            (r"\\", r"\_", false),
+            (r"\\\", r"\_", false),
+            (r"\\\\", r"\_", false),
+            (r"a", r"\_", false),
+            (r"\a", r"\_", false),
+            (r"\\a", r"\_", false),
+            (r"%", r"\_", false),
+            (r"\%", r"\_", false),
+            (r"\\%", r"\_", false),
+            (r"%%", r"\_", false),
+            (r"\%%", r"\_", false),
+            (r"\\%%", r"\_", false),
+            (r"_", r"\_", true),
+            (r"\_", r"\_", false),
+            (r"\\_", r"\_", false),
+            (r"__", r"\_", false),
+            (r"\__", r"\_", false),
+            (r"\\__", r"\_", false),
+            (r"abc", r"\_", false),
+            (r"a_c", r"\_", false),
+            (r"a\bc", r"\_", false),
+            (r"a\_c", r"\_", false),
+            (r"%abc", r"\_", false),
+            (r"\%abc", r"\_", false),
+            (r"a\\_c%", r"\_", false),
+            (r"", r"\\_", false),
+            (r"\", r"\\_", false),
+            (r"\\", r"\\_", true),
+            (r"\\\", r"\\_", false),
+            (r"\\\\", r"\\_", false),
+            (r"a", r"\\_", false),
+            (r"\a", r"\\_", true),
+            (r"\\a", r"\\_", false),
+            (r"%", r"\\_", false),
+            (r"\%", r"\\_", true),
+            (r"\\%", r"\\_", false),
+            (r"%%", r"\\_", false),
+            (r"\%%", r"\\_", false),
+            (r"\\%%", r"\\_", false),
+            (r"_", r"\\_", false),
+            (r"\_", r"\\_", true),
+            (r"\\_", r"\\_", false),
+            (r"__", r"\\_", false),
+            (r"\__", r"\\_", false),
+            (r"\\__", r"\\_", false),
+            (r"abc", r"\\_", false),
+            (r"a_c", r"\\_", false),
+            (r"a\bc", r"\\_", false),
+            (r"a\_c", r"\\_", false),
+            (r"%abc", r"\\_", false),
+            (r"\%abc", r"\\_", false),
+            (r"a\\_c%", r"\\_", false),
+            (r"", r"__", false),
+            (r"\", r"__", false),
+            (r"\\", r"__", true),
+            (r"\\\", r"__", false),
+            (r"\\\\", r"__", false),
+            (r"a", r"__", false),
+            (r"\a", r"__", true),
+            (r"\\a", r"__", false),
+            (r"%", r"__", false),
+            (r"\%", r"__", true),
+            (r"\\%", r"__", false),
+            (r"%%", r"__", true),
+            (r"\%%", r"__", false),
+            (r"\\%%", r"__", false),
+            (r"_", r"__", false),
+            (r"\_", r"__", true),
+            (r"\\_", r"__", false),
+            (r"__", r"__", true),
+            (r"\__", r"__", false),
+            (r"\\__", r"__", false),
+            (r"abc", r"__", false),
+            (r"a_c", r"__", false),
+            (r"a\bc", r"__", false),
+            (r"a\_c", r"__", false),
+            (r"%abc", r"__", false),
+            (r"\%abc", r"__", false),
+            (r"a\\_c%", r"__", false),
+            (r"", r"\__", false),
+            (r"\", r"\__", false),
+            (r"\\", r"\__", false),
+            (r"\\\", r"\__", false),
+            (r"\\\\", r"\__", false),
+            (r"a", r"\__", false),
+            (r"\a", r"\__", false),
+            (r"\\a", r"\__", false),
+            (r"%", r"\__", false),
+            (r"\%", r"\__", false),
+            (r"\\%", r"\__", false),
+            (r"%%", r"\__", false),
+            (r"\%%", r"\__", false),
+            (r"\\%%", r"\__", false),
+            (r"_", r"\__", false),
+            (r"\_", r"\__", false),
+            (r"\\_", r"\__", false),
+            (r"__", r"\__", true),
+            (r"\__", r"\__", false),
+            (r"\\__", r"\__", false),
+            (r"abc", r"\__", false),
+            (r"a_c", r"\__", false),
+            (r"a\bc", r"\__", false),
+            (r"a\_c", r"\__", false),
+            (r"%abc", r"\__", false),
+            (r"\%abc", r"\__", false),
+            (r"a\\_c%", r"\__", false),
+            (r"", r"\\__", false),
+            (r"\", r"\\__", false),
+            (r"\\", r"\\__", false),
+            (r"\\\", r"\\__", true),
+            (r"\\\\", r"\\__", false),
+            (r"a", r"\\__", false),
+            (r"\a", r"\\__", false),
+            (r"\\a", r"\\__", true),
+            (r"%", r"\\__", false),
+            (r"\%", r"\\__", false),
+            (r"\\%", r"\\__", true),
+            (r"%%", r"\\__", false),
+            (r"\%%", r"\\__", true),
+            (r"\\%%", r"\\__", false),
+            (r"_", r"\\__", false),
+            (r"\_", r"\\__", false),
+            (r"\\_", r"\\__", true),
+            (r"__", r"\\__", false),
+            (r"\__", r"\\__", true),
+            (r"\\__", r"\\__", false),
+            (r"abc", r"\\__", false),
+            (r"a_c", r"\\__", false),
+            (r"a\bc", r"\\__", false),
+            (r"a\_c", r"\\__", false),
+            (r"%abc", r"\\__", false),
+            (r"\%abc", r"\\__", false),
+            (r"a\\_c%", r"\\__", false),
+            (r"", r"abc", false),
+            (r"\", r"abc", false),
+            (r"\\", r"abc", false),
+            (r"\\\", r"abc", false),
+            (r"\\\\", r"abc", false),
+            (r"a", r"abc", false),
+            (r"\a", r"abc", false),
+            (r"\\a", r"abc", false),
+            (r"%", r"abc", false),
+            (r"\%", r"abc", false),
+            (r"\\%", r"abc", false),
+            (r"%%", r"abc", false),
+            (r"\%%", r"abc", false),
+            (r"\\%%", r"abc", false),
+            (r"_", r"abc", false),
+            (r"\_", r"abc", false),
+            (r"\\_", r"abc", false),
+            (r"__", r"abc", false),
+            (r"\__", r"abc", false),
+            (r"\\__", r"abc", false),
+            (r"abc", r"abc", true),
+            (r"a_c", r"abc", false),
+            (r"a\bc", r"abc", false),
+            (r"a\_c", r"abc", false),
+            (r"%abc", r"abc", false),
+            (r"\%abc", r"abc", false),
+            (r"a\\_c%", r"abc", false),
+            (r"", r"a_c", false),
+            (r"\", r"a_c", false),
+            (r"\\", r"a_c", false),
+            (r"\\\", r"a_c", false),
+            (r"\\\\", r"a_c", false),
+            (r"a", r"a_c", false),
+            (r"\a", r"a_c", false),
+            (r"\\a", r"a_c", false),
+            (r"%", r"a_c", false),
+            (r"\%", r"a_c", false),
+            (r"\\%", r"a_c", false),
+            (r"%%", r"a_c", false),
+            (r"\%%", r"a_c", false),
+            (r"\\%%", r"a_c", false),
+            (r"_", r"a_c", false),
+            (r"\_", r"a_c", false),
+            (r"\\_", r"a_c", false),
+            (r"__", r"a_c", false),
+            (r"\__", r"a_c", false),
+            (r"\\__", r"a_c", false),
+            (r"abc", r"a_c", true),
+            (r"a_c", r"a_c", true),
+            (r"a\bc", r"a_c", false),
+            (r"a\_c", r"a_c", false),
+            (r"%abc", r"a_c", false),
+            (r"\%abc", r"a_c", false),
+            (r"a\\_c%", r"a_c", false),
+            (r"", r"a\bc", false),
+            (r"\", r"a\bc", false),
+            (r"\\", r"a\bc", false),
+            (r"\\\", r"a\bc", false),
+            (r"\\\\", r"a\bc", false),
+            (r"a", r"a\bc", false),
+            (r"\a", r"a\bc", false),
+            (r"\\a", r"a\bc", false),
+            (r"%", r"a\bc", false),
+            (r"\%", r"a\bc", false),
+            (r"\\%", r"a\bc", false),
+            (r"%%", r"a\bc", false),
+            (r"\%%", r"a\bc", false),
+            (r"\\%%", r"a\bc", false),
+            (r"_", r"a\bc", false),
+            (r"\_", r"a\bc", false),
+            (r"\\_", r"a\bc", false),
+            (r"__", r"a\bc", false),
+            (r"\__", r"a\bc", false),
+            (r"\\__", r"a\bc", false),
+            (r"abc", r"a\bc", true),
+            (r"a_c", r"a\bc", false),
+            (r"a\bc", r"a\bc", false),
+            (r"a\_c", r"a\bc", false),
+            (r"%abc", r"a\bc", false),
+            (r"\%abc", r"a\bc", false),
+            (r"a\\_c%", r"a\bc", false),
+            (r"", r"a\_c", false),
+            (r"\", r"a\_c", false),
+            (r"\\", r"a\_c", false),
+            (r"\\\", r"a\_c", false),
+            (r"\\\\", r"a\_c", false),
+            (r"a", r"a\_c", false),
+            (r"\a", r"a\_c", false),
+            (r"\\a", r"a\_c", false),
+            (r"%", r"a\_c", false),
+            (r"\%", r"a\_c", false),
+            (r"\\%", r"a\_c", false),
+            (r"%%", r"a\_c", false),
+            (r"\%%", r"a\_c", false),
+            (r"\\%%", r"a\_c", false),
+            (r"_", r"a\_c", false),
+            (r"\_", r"a\_c", false),
+            (r"\\_", r"a\_c", false),
+            (r"__", r"a\_c", false),
+            (r"\__", r"a\_c", false),
+            (r"\\__", r"a\_c", false),
+            (r"abc", r"a\_c", false),
+            (r"a_c", r"a\_c", true),
+            (r"a\bc", r"a\_c", false),
+            (r"a\_c", r"a\_c", false),
+            (r"%abc", r"a\_c", false),
+            (r"\%abc", r"a\_c", false),
+            (r"a\\_c%", r"a\_c", false),
+            (r"", r"%abc", false),
+            (r"\", r"%abc", false),
+            (r"\\", r"%abc", false),
+            (r"\\\", r"%abc", false),
+            (r"\\\\", r"%abc", false),
+            (r"a", r"%abc", false),
+            (r"\a", r"%abc", false),
+            (r"\\a", r"%abc", false),
+            (r"%", r"%abc", false),
+            (r"\%", r"%abc", false),
+            (r"\\%", r"%abc", false),
+            (r"%%", r"%abc", false),
+            (r"\%%", r"%abc", false),
+            (r"\\%%", r"%abc", false),
+            (r"_", r"%abc", false),
+            (r"\_", r"%abc", false),
+            (r"\\_", r"%abc", false),
+            (r"__", r"%abc", false),
+            (r"\__", r"%abc", false),
+            (r"\\__", r"%abc", false),
+            (r"abc", r"%abc", true),
+            (r"a_c", r"%abc", false),
+            (r"a\bc", r"%abc", false),
+            (r"a\_c", r"%abc", false),
+            (r"%abc", r"%abc", true),
+            (r"\%abc", r"%abc", true),
+            (r"a\\_c%", r"%abc", false),
+            (r"", r"\%abc", false),
+            (r"\", r"\%abc", false),
+            (r"\\", r"\%abc", false),
+            (r"\\\", r"\%abc", false),
+            (r"\\\\", r"\%abc", false),
+            (r"a", r"\%abc", false),
+            (r"\a", r"\%abc", false),
+            (r"\\a", r"\%abc", false),
+            (r"%", r"\%abc", false),
+            (r"\%", r"\%abc", false),
+            (r"\\%", r"\%abc", false),
+            (r"%%", r"\%abc", false),
+            (r"\%%", r"\%abc", false),
+            (r"\\%%", r"\%abc", false),
+            (r"_", r"\%abc", false),
+            (r"\_", r"\%abc", false),
+            (r"\\_", r"\%abc", false),
+            (r"__", r"\%abc", false),
+            (r"\__", r"\%abc", false),
+            (r"\\__", r"\%abc", false),
+            (r"abc", r"\%abc", false),
+            (r"a_c", r"\%abc", false),
+            (r"a\bc", r"\%abc", false),
+            (r"a\_c", r"\%abc", false),
+            (r"%abc", r"\%abc", true),
+            (r"\%abc", r"\%abc", false),
+            (r"a\\_c%", r"\%abc", false),
+            (r"", r"a\\_c%", false),
+            (r"\", r"a\\_c%", false),
+            (r"\\", r"a\\_c%", false),
+            (r"\\\", r"a\\_c%", false),
+            (r"\\\\", r"a\\_c%", false),
+            (r"a", r"a\\_c%", false),
+            (r"\a", r"a\\_c%", false),
+            (r"\\a", r"a\\_c%", false),
+            (r"%", r"a\\_c%", false),
+            (r"\%", r"a\\_c%", false),
+            (r"\\%", r"a\\_c%", false),
+            (r"%%", r"a\\_c%", false),
+            (r"\%%", r"a\\_c%", false),
+            (r"\\%%", r"a\\_c%", false),
+            (r"_", r"a\\_c%", false),
+            (r"\_", r"a\\_c%", false),
+            (r"\\_", r"a\\_c%", false),
+            (r"__", r"a\\_c%", false),
+            (r"\__", r"a\\_c%", false),
+            (r"\\__", r"a\\_c%", false),
+            (r"abc", r"a\\_c%", false),
+            (r"a_c", r"a\\_c%", false),
+            (r"a\bc", r"a\\_c%", true),
+            (r"a\_c", r"a\\_c%", true),
+            (r"%abc", r"a\\_c%", false),
+            (r"\%abc", r"a\\_c%", false),
+            (r"a\\_c%", r"a\\_c%", false),
+        ];
+
+        let values = test_cases
+            .iter()
+            .map(|(value, _, _)| *value)
+            .collect::<Vec<_>>();
+        let patterns = test_cases
+            .iter()
+            .map(|(_, pattern, _)| *pattern)
+            .collect::<Vec<_>>();
+        let expected = BooleanArray::from(
+            test_cases
+                .iter()
+                .map(|(_, _, expected)| *expected)
+                .collect::<Vec<_>>(),
+        );
+        let unexpected = BooleanArray::from(
+            test_cases
+                .iter()
+                .map(|(_, _, expected)| !*expected)
+                .collect::<Vec<_>>(),
+        );
+
+        for string_type in [DataType::Utf8, DataType::LargeUtf8, 
DataType::Utf8View] {
+            let values = make_array(values.iter(), &string_type);
+            let patterns = make_array(patterns.iter(), &string_type);
+            let (values, patterns) = (values.as_ref(), patterns.as_ref());
+
+            assert_eq!(like(&values, &patterns).unwrap(), expected,);
+            assert_eq!(ilike(&values, &patterns).unwrap(), expected,);
+            assert_eq!(nlike(&values, &patterns).unwrap(), unexpected,);
+            assert_eq!(nilike(&values, &patterns).unwrap(), unexpected,);
+        }
+    }
+
+    fn make_datums(
+        value: impl AsRef<str>,
+        data_type: &DataType,
+    ) -> Vec<(Box<dyn Datum>, DatumType)> {
+        match data_type {
+            DataType::Utf8 => {
+                let array = StringArray::from_iter_values([value]);
+                vec![
+                    (Box::new(array.clone()), DatumType::Array),
+                    (Box::new(Scalar::new(array)), DatumType::Scalar),
+                ]
+            }
+            DataType::LargeUtf8 => {
+                let array = LargeStringArray::from_iter_values([value]);
+                vec![
+                    (Box::new(array.clone()), DatumType::Array),
+                    (Box::new(Scalar::new(array)), DatumType::Scalar),
+                ]
+            }
+            DataType::Utf8View => {
+                let array = StringViewArray::from_iter_values([value]);
+                vec![
+                    (Box::new(array.clone()), DatumType::Array),
+                    (Box::new(Scalar::new(array)), DatumType::Scalar),
+                ]
+            }
+            _ => unimplemented!(),
+        }
+    }
+
+    fn make_array(
+        values: impl IntoIterator<Item: AsRef<str>>,
+        data_type: &DataType,
+    ) -> Box<dyn Array> {
+        match data_type {
+            DataType::Utf8 => Box::new(StringArray::from_iter_values(values)),
+            DataType::LargeUtf8 => 
Box::new(LargeStringArray::from_iter_values(values)),
+            DataType::Utf8View => 
Box::new(StringViewArray::from_iter_values(values)),
+            _ => unimplemented!(),
+        }
+    }
+
+    #[derive(Debug)]
+    enum DatumType {
+        Array,
+        Scalar,
+    }
 }
diff --git a/arrow-string/src/predicate.rs b/arrow-string/src/predicate.rs
index ae2493692d..8bbfe65bab 100644
--- a/arrow-string/src/predicate.rs
+++ b/arrow-string/src/predicate.rs
@@ -18,7 +18,7 @@
 use arrow_array::{Array, ArrayAccessor, BooleanArray, StringViewArray};
 use arrow_buffer::BooleanBuffer;
 use arrow_schema::ArrowError;
-use memchr::memchr2;
+use memchr::memchr3;
 use memchr::memmem::Finder;
 use regex::{Regex, RegexBuilder};
 use std::iter::zip;
@@ -45,16 +45,12 @@ impl<'a> Predicate<'a> {
     pub fn like(pattern: &'a str) -> Result<Self, ArrowError> {
         if !contains_like_pattern(pattern) {
             Ok(Self::Eq(pattern))
-        } else if pattern.ends_with('%')
-            && !pattern.ends_with("\\%")
-            && !contains_like_pattern(&pattern[..pattern.len() - 1])
-        {
+        } else if pattern.ends_with('%') && 
!contains_like_pattern(&pattern[..pattern.len() - 1]) {
             Ok(Self::StartsWith(&pattern[..pattern.len() - 1]))
         } else if pattern.starts_with('%') && 
!contains_like_pattern(&pattern[1..]) {
             Ok(Self::EndsWith(&pattern[1..]))
         } else if pattern.starts_with('%')
             && pattern.ends_with('%')
-            && !pattern.ends_with("\\%")
             && !contains_like_pattern(&pattern[1..pattern.len() - 1])
         {
             Ok(Self::contains(&pattern[1..pattern.len() - 1]))
@@ -262,12 +258,16 @@ fn regex_like(pattern: &str, case_insensitive: bool) -> 
Result<Regex, ArrowError
         match c {
             '\\' => {
                 match chars_iter.peek() {
-                    Some(next) if is_like_pattern(*next) => {
-                        result.push(*next);
+                    Some(&next) => {
+                        if regex_syntax::is_meta_character(next) {
+                            result.push('\\');
+                        }
+                        result.push(next);
                         // Skipping the next char as it is already appended
                         chars_iter.next();
                     }
-                    _ => {
+                    None => {
+                        // Trailing backslash in the pattern. E.g. PostgreSQL 
and Trino treat it as an error, but e.g. Snowflake treats it as a literal 
backslash
                         result.push('\\');
                         result.push('\\');
                     }
@@ -301,12 +301,8 @@ fn regex_like(pattern: &str, case_insensitive: bool) -> 
Result<Regex, ArrowError
         })
 }
 
-fn is_like_pattern(c: char) -> bool {
-    c == '%' || c == '_'
-}
-
 fn contains_like_pattern(pattern: &str) -> bool {
-    memchr2(b'%', b'_', pattern.as_bytes()).is_some()
+    memchr3(b'%', b'_', b'\\', pattern.as_bytes()).is_some()
 }
 
 #[cfg(test)]
@@ -314,51 +310,32 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_replace_start_end_percent() {
-        let a_eq = "%foobar%";
-        let expected = "foobar";
-        let r = regex_like(a_eq, false).unwrap();
-        assert_eq!(r.to_string(), expected);
-    }
-
-    #[test]
-    fn test_replace_middle_percent() {
-        let a_eq = "foo%bar";
-        let expected = "^foo.*bar$";
-        let r = regex_like(a_eq, false).unwrap();
-        assert_eq!(r.to_string(), expected);
-    }
-
-    #[test]
-    fn test_replace_underscore() {
-        let a_eq = "foo_bar";
-        let expected = "^foo.bar$";
-        let r = regex_like(a_eq, false).unwrap();
-        assert_eq!(r.to_string(), expected);
-    }
-
-    #[test]
-    fn test_replace_like_wildcards_leave_like_meta_chars() {
-        let a_eq = "\\%\\_";
-        let expected = "^%_$";
-        let r = regex_like(a_eq, false).unwrap();
-        assert_eq!(r.to_string(), expected);
-    }
-
-    #[test]
-    fn test_replace_like_wildcards_with_multiple_escape_chars() {
-        let a_eq = "\\\\%";
-        let expected = "^\\\\%$";
-        let r = regex_like(a_eq, false).unwrap();
-        assert_eq!(r.to_string(), expected);
-    }
-
-    #[test]
-    fn test_replace_like_wildcards_escape_regex_meta_char() {
-        let a_eq = ".";
-        let expected = "^\\.$";
-        let r = regex_like(a_eq, false).unwrap();
-        assert_eq!(r.to_string(), expected);
+    fn test_regex_like() {
+        let test_cases = [
+            // %..%
+            (r"%foobar%", r"foobar"),
+            // ..%..
+            (r"foo%bar", r"^foo.*bar$"),
+            // .._..
+            (r"foo_bar", r"^foo.bar$"),
+            // escaped wildcards
+            (r"\%\_", r"^%_$"),
+            // escaped non-wildcard
+            (r"\a", r"^a$"),
+            // escaped escape and wildcard
+            (r"\\%", r"^\\"),
+            // escaped escape and non-wildcard
+            (r"\\a", r"^\\a$"),
+            // regex meta character
+            (r".", r"^\.$"),
+            (r"$", r"^\$$"),
+            (r"\\", r"^\\$"),
+        ];
+
+        for (like_pattern, expected_regexp) in test_cases {
+            let r = regex_like(like_pattern, false).unwrap();
+            assert_eq!(r.to_string(), expected_regexp);
+        }
     }
 
     #[test]

Reply via email to