This is an automated email from the ASF dual-hosted git repository.

mbudiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new b91c0c00d2 [CALCITE-6309] Add REGEXP_LIKE function (enabled in MySQL, 
Oracle, PostgreSQL and Spark libraries)
b91c0c00d2 is described below

commit b91c0c00d2cd05db56c775ae335e4933a224b051
Author: James Duong <[email protected]>
AuthorDate: Tue Mar 12 13:33:30 2024 -0700

    [CALCITE-6309] Add REGEXP_LIKE function (enabled in MySQL, Oracle, 
PostgreSQL and Spark libraries)
    
    Supports a 3-argument REGEXP_LIKE variant that takes in a string of regex 
flags in
    addition to the existing 2-argument variant.
    
    Add support for REGEXP_LIKE(value, regex) to MySQL, Oracle, and PostgreSQL.
    Add support for REGEXP_LIKE(value, regex, flags) to MySQL, Oracle, 
PostgreSQL, and Spark.
    
    Fix RLIKE-related tests such that they validate results instead of
    only checking that expressions parse correctly and return the correct
    type.
---
 .../calcite/adapter/enumerable/RexImpTable.java    |  2 +-
 .../org/apache/calcite/runtime/SqlFunctions.java   | 23 +++++++++++-
 .../calcite/sql/fun/SqlLibraryOperators.java       |  4 +-
 .../org/apache/calcite/util/BuiltInMethod.java     |  2 +
 site/_docs/reference.md                            |  2 +-
 .../org/apache/calcite/test/SqlOperatorTest.java   | 43 +++++++++++++++++++---
 6 files changed, 66 insertions(+), 10 deletions(-)

diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index 37fdd4acf3..c622af252e 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -601,7 +601,7 @@ public class RexImpTable {
       defineReflective(PARSE_URL, BuiltInMethod.PARSE_URL2.method,
           BuiltInMethod.PARSE_URL3.method);
       defineReflective(REGEXP, BuiltInMethod.RLIKE.method);
-      defineReflective(REGEXP_LIKE, BuiltInMethod.RLIKE.method);
+      defineReflective(REGEXP_LIKE, BuiltInMethod.RLIKE.method, 
BuiltInMethod.REGEXP_LIKE3.method);
       defineReflective(REGEXP_CONTAINS, BuiltInMethod.REGEXP_CONTAINS.method);
       defineReflective(REGEXP_EXTRACT, BuiltInMethod.REGEXP_EXTRACT2.method,
           BuiltInMethod.REGEXP_EXTRACT3.method, 
BuiltInMethod.REGEXP_EXTRACT4.method);
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java 
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index 54a9571907..523c2d00e5 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -401,10 +401,16 @@ public class SqlFunctions {
     /** Validate regex arguments in REGEXP_* fns, throws an exception
      * for invalid regex patterns, else returns a Pattern object. */
     private Pattern validateRegexPattern(String regex, String methodName) {
+      return validateRegexPattern(regex, methodName, 0);
+    }
+
+    /** Validate regex arguments in REGEXP_* fns, throws an exception
+     * for invalid regex patterns, else returns a Pattern object. */
+    private Pattern validateRegexPattern(String regex, String methodName, int 
flags) {
       try {
         // Uses java.util.regex as a standard for regex processing
         // in Calcite instead of RE2 used by BigQuery/GoogleSQL
-        return cache.getUnchecked(new Key(0, regex));
+        return cache.getUnchecked(new Key(flags, regex));
       } catch (UncheckedExecutionException e) {
         if (e.getCause() instanceof PatternSyntaxException) {
           throw RESOURCE.invalidRegexInputForRegexpFunctions(
@@ -486,6 +492,14 @@ public class SqlFunctions {
       return pattern.matcher(value).find();
     }
 
+    /** SQL {@code REGEXP_LIKE(value, regexp, flags)} function.
+     * Throws a runtime exception for invalid regular expressions. */
+    @SuppressWarnings("unused")
+    public boolean regexpLike(String value, String regex, String stringFlags) {
+      final Pattern pattern =
+          validateRegexPattern(regex, "REGEXP_LIKE", 
makeRegexpFlags(stringFlags));
+      return pattern.matcher(value).find();
+    }
     /** SQL {@code REGEXP_EXTRACT(value, regexp)} function.
      * Returns NULL if there is no match. Returns an exception if regex is 
invalid.
      * Uses position=1 and occurrence=1 as default values when not specified. 
*/
@@ -680,8 +694,15 @@ public class SqlFunctions {
           flags |= Pattern.DOTALL;
           break;
         case 'm':
+          // PostgreSQL should actually interpret m to be a synonym for n, but 
this is
+          // relaxed for consistency.
           flags |= Pattern.MULTILINE;
           break;
+        case 's':
+          // This flag is in PostgreSQL but doesn't apply to other libraries. 
This is relaxed
+          // for consistency.
+          flags &= ~Pattern.DOTALL;
+          break;
         default:
           throw RESOURCE.invalidInputForRegexpReplace(stringFlags).ex();
         }
diff --git 
a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java 
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index 5f27b503d5..681671af8f 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -557,10 +557,10 @@ public abstract class SqlLibraryOperators {
           SqlFunctionCategory.STRING);
 
   /** The "REGEXP_LIKE(value, regexp)" function, equivalent to {@link #RLIKE}. 
*/
-  @LibraryOperator(libraries = {SPARK})
+  @LibraryOperator(libraries = {SPARK, MYSQL, POSTGRESQL, ORACLE})
   public static final SqlFunction REGEXP_LIKE =
       SqlBasicFunction.create("REGEXP_LIKE", ReturnTypes.BOOLEAN_NULLABLE,
-          OperandTypes.STRING_STRING,
+          OperandTypes.STRING_STRING_OPTIONAL_STRING,
           SqlFunctionCategory.STRING);
 
   @LibraryOperator(libraries = {MYSQL})
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java 
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index b25a29264f..53d4d33c16 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -563,6 +563,8 @@ public enum BuiltInMethod {
       String.class, String.class, int.class, int.class),
   REGEXP_INSTR5(SqlFunctions.RegexFunction.class, "regexpInstr",
       String.class, String.class, int.class, int.class, int.class),
+  REGEXP_LIKE3(SqlFunctions.RegexFunction.class, "regexpLike",
+      String.class, String.class, String.class),
   REGEXP_REPLACE3(SqlFunctions.RegexFunction.class, "regexpReplace",
       String.class, String.class, String.class),
   REGEXP_REPLACE4(SqlFunctions.RegexFunction.class, "regexpReplace",
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index 542b1f6af0..ea9b3759a7 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2821,7 +2821,7 @@ In the following:
 | b | REGEXP_EXTRACT(string, regexp [, position [, occurrence]]) | Returns the 
substring in *string* that matches the *regexp*, starting search at *position* 
(default 1), and until locating the nth *occurrence* (default 1). Returns NULL 
if there is no match
 | b | REGEXP_EXTRACT_ALL(string, regexp)             | Returns an array of all 
substrings in *string* that matches the *regexp*. Returns an empty array if 
there is no match
 | b | REGEXP_INSTR(string, regexp [, position [, occurrence [, 
occurrence_position]]]) | Returns the lowest 1-based position of the substring 
in *string* that matches the *regexp*, starting search at *position* (default 
1), and until locating the nth *occurrence* (default 1). Setting 
occurrence_position (default 0) to 1 returns the end position of substring + 1. 
Returns 0 if there is no match
-| s | REGEXP_LIKE(string, regexp)                    | Equivalent to `string1 
RLIKE string2`
+| m o p s | REGEXP_LIKE(string, regexp [, flags])        | Equivalent to 
`string1 RLIKE string2` with an optional parameter for search flags. Supported 
flags are: <ul><li>i: case-insensitive matching</li><li>c: case-sensitive 
matching</li><li>n: newline-sensitive matching</li><li>s: non-newline-sensitive 
matching</li><li>m: multi-line</li></ul>
 | b m o | REGEXP_REPLACE(string, regexp, rep [, pos [, occurrence [, 
matchType]]]) | Replaces all substrings of *string* that match *regexp* with 
*rep* at the starting *pos* in expr (if omitted, the default is 1), 
*occurrence* specifies which occurrence of a match to search for (if omitted, 
the default is 1), *matchType* specifies how to perform matching
 | b | REGEXP_SUBSTR(string, regexp [, position [, occurrence]]) | Synonym for 
REGEXP_EXTRACT
 | b m p s | REPEAT(string, integer)                  | Returns a string 
consisting of *string* repeated of *integer* times; returns an empty string if 
*integer* is less than 1
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java 
b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index 9c5535eb67..9f3c10e8b7 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -3553,15 +3553,21 @@ public class SqlOperatorTest {
   }
 
   @Test void testRlikeOperator() {
-    final SqlOperatorFixture f = fixture().setFor(SqlLibraryOperators.RLIKE, 
VM_EXPAND);
+    final SqlOperatorFixture f = fixture()
+        .setFor(SqlLibraryOperators.RLIKE, VM_EXPAND);
     checkRlikeFunc(f, SqlLibrary.HIVE, SqlLibraryOperators.RLIKE);
     checkRlikeFunc(f, SqlLibrary.SPARK, SqlLibraryOperators.RLIKE);
     checkRlikeFunc(f, SqlLibrary.SPARK, SqlLibraryOperators.REGEXP);
-    checkRlikeFunc(f, SqlLibrary.SPARK, SqlLibraryOperators.REGEXP_LIKE);
     checkNotRlikeFunc(f.withLibrary(SqlLibrary.HIVE));
     checkNotRlikeFunc(f.withLibrary(SqlLibrary.SPARK));
     checkRlikeFails(f.withLibrary(SqlLibrary.MYSQL));
     checkRlikeFails(f.withLibrary(SqlLibrary.ORACLE));
+
+    f.setFor(SqlLibraryOperators.REGEXP_LIKE, VM_EXPAND);
+    checkRlikeFunc(f, SqlLibrary.SPARK, SqlLibraryOperators.REGEXP_LIKE);
+    checkRlikeFunc(f, SqlLibrary.POSTGRESQL, SqlLibraryOperators.REGEXP_LIKE);
+    checkRlikeFunc(f, SqlLibrary.MYSQL, SqlLibraryOperators.REGEXP_LIKE);
+    checkRlikeFunc(f, SqlLibrary.ORACLE, SqlLibraryOperators.REGEXP_LIKE);
   }
 
   void checkRlikeFunc(SqlOperatorFixture f0, SqlLibrary library, SqlOperator 
operator) {
@@ -3654,9 +3660,8 @@ public class SqlOperatorTest {
   }
 
   @Test void testIlikeEscape() {
-    final SqlOperatorFixture f =
-        fixture().setFor(SqlLibraryOperators.ILIKE, VmName.EXPAND)
-            .withLibrary(SqlLibrary.POSTGRESQL);
+    final SqlOperatorFixture f = fixture().setFor(SqlLibraryOperators.ILIKE, 
VmName.EXPAND)
+        .withLibrary(SqlLibrary.POSTGRESQL);
     f.checkBoolean("'a_c' ilike 'a#_C' escape '#'", true);
     f.checkBoolean("'axc' ilike 'a#_C' escape '#'", false);
     f.checkBoolean("'a_c' ilike 'a\\_C' escape '\\'", true);
@@ -3742,6 +3747,34 @@ public class SqlOperatorTest {
     f1.checkBoolean("'ab\ncd\nef' ilike '%cde%'", false);
   }
 
+  /** Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-6309";>[CALCITE-6309]
+   * Add REGEXP_LIKE function (enabled in MySQL, Oracle, PostgreSQL and Spark 
libraries)</a>. */
+  @Test void testRegexpLike3() {
+    final SqlOperatorFixture f = fixture();
+    f.setFor(SqlLibraryOperators.REGEXP_LIKE, VmName.EXPAND);
+
+    final Consumer<SqlOperatorFixture> consumer = f1 -> {
+      f1.checkBoolean("REGEXP_LIKE('teststr', 'TEST', 'i')", true);
+      f1.checkBoolean("REGEXP_LIKE('ateststr', 'TEST', 'c')", false);
+      f1.checkBoolean("REGEXP_LIKE('atest\nstr', 'test.str', '')", false);
+      f1.checkBoolean("REGEXP_LIKE('atest\nstr', 'test.str', 'n')", true);
+      f1.checkBoolean("REGEXP_LIKE('atest\nstr', 'TEST.str', 'in')", true);
+      f1.checkBoolean("REGEXP_LIKE('ateststring', 'teststr', '')", true);
+      f1.checkBoolean("REGEXP_LIKE('ateststring', 'TESTstr', 'ic')", false);
+      f1.checkBoolean("REGEXP_LIKE('ateststring', 'TESTstr', 'ci')", true);
+      f1.checkBoolean("REGEXP_LIKE('atest\nstr', 'test.str', 's')", false);
+      f1.checkBoolean("REGEXP_LIKE('atest\nstr', 'test.str', 'ns')", false);
+      f1.checkBoolean("REGEXP_LIKE('atest\nstr', 'test.str', 'sn')", true);
+      f1.checkNull("REGEXP_LIKE(NULL, 'test.str', 'sn')");
+      f1.checkNull("REGEXP_LIKE('atest\nstr', NULL, 'sn')");
+      f1.checkNull("REGEXP_LIKE('atest\nstr', 'test.str', NULL)");
+    };
+    f.forEachLibrary(
+        list(SqlLibrary.MYSQL, SqlLibrary.SPARK,
+        SqlLibrary.POSTGRESQL, SqlLibrary.ORACLE), consumer);
+  }
+
   /** Test case for
    * <a 
href="https://issues.apache.org/jira/browse/CALCITE-1898";>[CALCITE-1898]
    * LIKE must match '.' (period) literally</a>. */

Reply via email to