Anthrino commented on code in PR #3387:
URL: https://github.com/apache/calcite/pull/3387#discussion_r1310898531


##########
core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java:
##########
@@ -347,19 +347,119 @@ public static String sha512(ByteString string)  {
   }
 
   /** SQL {@code REGEXP_CONTAINS(value, regexp)} function.
-   * Throws a runtime exception for invalid regular expressions.*/
+   * Throws a runtime exception for invalid regular expressions. */
   public static boolean regexpContains(String value, String regex) {
     try {
       // Uses java.util.regex as a standard for regex processing
       // in Calcite instead of RE2 used by BigQuery/GoogleSQL
       Pattern regexp = Pattern.compile(regex);
       return regexp.matcher(value).find();
     } catch (PatternSyntaxException ex) {
-      throw 
RESOURCE.invalidInputForRegexpContains(ex.getMessage().replace("\r\n", " ")
-          .replace("\n", " ").replace("\r", " ")).ex();
+      throw RESOURCE.invalidRegexInputForRegexpFunctions(ex.getMessage()
+          .replace(System.lineSeparator(), " "), "REGEXP_CONTAINS").ex();
     }
   }
 
+  /** SQL {@code REGEXP_EXTRACT(value, regexp)} function.
+   *  Returns NULL if there is no match. Returns an exception if regex is 
invalid.
+   *  Uses position=1 and occurrence=1 as default values when not specified. */
+  public static @Nullable String regexpExtract(String value, String regex) {
+    return regexpExtract(value, regex, 1, 1);
+  }
+
+  /** SQL {@code REGEXP_EXTRACT(value, regexp, position)} function.
+   *  Returns NULL if there is no match, or if position is beyond range.
+   *  Returns an exception if regex or position is invalid.
+   *  Uses occurrence=1 as default value when not specified. */
+  public static @Nullable String regexpExtract(String value, String regex, int 
position) {
+    return regexpExtract(value, regex, position, 1);
+  }
+
+  /** SQL {@code REGEXP_EXTRACT(value, regexp, position, occurrence)} function.
+   *  Returns NULL if there is no match, or if position or occurrence are 
beyond range.
+   *  Returns an exception if regex, position or occurrence are invalid. */
+  public static @Nullable String regexpExtract(String value, String regex, int 
position,
+      int occurrence) {
+    // Uses java.util.regex as a standard for regex processing
+    // in Calcite instead of RE2 used by BigQuery/GoogleSQL
+    Pattern regexp;
+    String methodName = "REGEXP_EXTRACT";
+    try {
+      regexp = Pattern.compile(regex);
+    } catch (PatternSyntaxException ex) {
+      throw RESOURCE.invalidRegexInputForRegexpFunctions(ex.getMessage()
+          .replace(System.lineSeparator(), " "), methodName).ex();
+    }
+
+    Matcher matcher = regexp.matcher(value);
+
+    if (position <= 0) {
+      throw 
RESOURCE.invalidIntegerInputForRegexpFunctions(Integer.toString(position),
+          "position", methodName).ex();
+    }
+    if (position > value.length()) {
+      return null;
+    }
+    matcher.region(position - 1, value.length());
+
+    if (occurrence <= 0) {
+      throw 
RESOURCE.invalidIntegerInputForRegexpFunctions(Integer.toString(occurrence),
+          "occurrence", methodName).ex();
+    }
+
+    if (matcher.groupCount() > 1) {
+      throw RESOURCE.multipleCapturingGroupsForRegexpExtract(
+          Integer.toString(matcher.groupCount()), methodName).ex();
+    }
+
+    String match = null;
+    while (occurrence > 0) {
+      if (matcher.find()) {
+        if (matcher.groupCount() == 1) {
+          match = matcher.group(1);
+        } else {
+          match = matcher.group(0);
+        }
+      } else {
+        return null;
+      }
+      occurrence--;
+    }
+
+    return match;
+  }
+
+  /** SQL {@code REGEXP_EXTRACT_ALL(value, regexp)} function.
+   *  Returns an empty array if there is no match, returns an exception if 
regex is invalid.*/
+  public static List<String> regexpExtractAll(String value, String regex) {
+    // Uses java.util.regex as a standard for regex processing
+    // in Calcite instead of RE2 used by BigQuery/GoogleSQL
+    Pattern regexp;
+    String methodName = "REGEXP_EXTRACT_ALL";

Review Comment:
   @tanclary I've rebased the changes from previous PRs, also consolidated few 
of the redundant error checks into helper methods would be good if you can give 
it a review!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@calcite.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to