Anthrino commented on code in PR #3387: URL: https://github.com/apache/calcite/pull/3387#discussion_r1310898531
########## core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java: ########## @@ -347,19 +347,119 @@ public static String sha512(ByteString string) { } /** SQL {@code REGEXP_CONTAINS(value, regexp)} function. - * Throws a runtime exception for invalid regular expressions.*/ + * Throws a runtime exception for invalid regular expressions. */ public static boolean regexpContains(String value, String regex) { try { // Uses java.util.regex as a standard for regex processing // in Calcite instead of RE2 used by BigQuery/GoogleSQL Pattern regexp = Pattern.compile(regex); return regexp.matcher(value).find(); } catch (PatternSyntaxException ex) { - throw RESOURCE.invalidInputForRegexpContains(ex.getMessage().replace("\r\n", " ") - .replace("\n", " ").replace("\r", " ")).ex(); + throw RESOURCE.invalidRegexInputForRegexpFunctions(ex.getMessage() + .replace(System.lineSeparator(), " "), "REGEXP_CONTAINS").ex(); } } + /** SQL {@code REGEXP_EXTRACT(value, regexp)} function. + * Returns NULL if there is no match. Returns an exception if regex is invalid. + * Uses position=1 and occurrence=1 as default values when not specified. */ + public static @Nullable String regexpExtract(String value, String regex) { + return regexpExtract(value, regex, 1, 1); + } + + /** SQL {@code REGEXP_EXTRACT(value, regexp, position)} function. + * Returns NULL if there is no match, or if position is beyond range. + * Returns an exception if regex or position is invalid. + * Uses occurrence=1 as default value when not specified. */ + public static @Nullable String regexpExtract(String value, String regex, int position) { + return regexpExtract(value, regex, position, 1); + } + + /** SQL {@code REGEXP_EXTRACT(value, regexp, position, occurrence)} function. + * Returns NULL if there is no match, or if position or occurrence are beyond range. + * Returns an exception if regex, position or occurrence are invalid. */ + public static @Nullable String regexpExtract(String value, String regex, int position, + int occurrence) { + // Uses java.util.regex as a standard for regex processing + // in Calcite instead of RE2 used by BigQuery/GoogleSQL + Pattern regexp; + String methodName = "REGEXP_EXTRACT"; + try { + regexp = Pattern.compile(regex); + } catch (PatternSyntaxException ex) { + throw RESOURCE.invalidRegexInputForRegexpFunctions(ex.getMessage() + .replace(System.lineSeparator(), " "), methodName).ex(); + } + + Matcher matcher = regexp.matcher(value); + + if (position <= 0) { + throw RESOURCE.invalidIntegerInputForRegexpFunctions(Integer.toString(position), + "position", methodName).ex(); + } + if (position > value.length()) { + return null; + } + matcher.region(position - 1, value.length()); + + if (occurrence <= 0) { + throw RESOURCE.invalidIntegerInputForRegexpFunctions(Integer.toString(occurrence), + "occurrence", methodName).ex(); + } + + if (matcher.groupCount() > 1) { + throw RESOURCE.multipleCapturingGroupsForRegexpExtract( + Integer.toString(matcher.groupCount()), methodName).ex(); + } + + String match = null; + while (occurrence > 0) { + if (matcher.find()) { + if (matcher.groupCount() == 1) { + match = matcher.group(1); + } else { + match = matcher.group(0); + } + } else { + return null; + } + occurrence--; + } + + return match; + } + + /** SQL {@code REGEXP_EXTRACT_ALL(value, regexp)} function. + * Returns an empty array if there is no match, returns an exception if regex is invalid.*/ + public static List<String> regexpExtractAll(String value, String regex) { + // Uses java.util.regex as a standard for regex processing + // in Calcite instead of RE2 used by BigQuery/GoogleSQL + Pattern regexp; + String methodName = "REGEXP_EXTRACT_ALL"; Review Comment: @tanclary I've rebased the changes from previous PRs, also consolidated few of the redundant error checks into helper methods would be good if you can give it a review! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@calcite.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org