Anthrino commented on code in PR #3369:
URL: https://github.com/apache/calcite/pull/3369#discussion_r1300724825
##########
core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java:
##########
@@ -355,19 +355,88 @@ public static String sha512(ByteString string) {
}
/** SQL {@code REGEXP_CONTAINS(value, regexp)} function.
- * Throws a runtime exception for invalid regular expressions.*/
+ * Throws a runtime exception for invalid regular expressions. */
public static boolean regexpContains(String value, String regex) {
try {
// Uses java.util.regex as a standard for regex processing
// in Calcite instead of RE2 used by BigQuery/GoogleSQL
Pattern regexp = Pattern.compile(regex);
return regexp.matcher(value).find();
} catch (PatternSyntaxException ex) {
- throw
RESOURCE.invalidInputForRegexpContains(ex.getMessage().replace("\r\n", " ")
- .replace("\n", " ").replace("\r", " ")).ex();
+ throw RESOURCE.invalidRegexInputForRegexpFunctions(ex.getMessage()
+ .replace(System.lineSeparator(), " "), "REGEXP_CONTAINS").ex();
}
}
+ /** SQL {@code REGEXP_EXTRACT(value, regexp)} function.
+ * Returns NULL if there is no match. Returns an exception if regex is
invalid.
+ * Uses position=1 and occurrence=1 as default values when not specified. */
+ public static @Nullable String regexpExtract(String value, String regex) {
+ return regexpExtract(value, regex, 1, 1);
+ }
+
+ /** SQL {@code REGEXP_EXTRACT(value, regexp, position)} function.
+ * Returns NULL if there is no match, or if position is beyond range.
+ * Returns an exception if regex or position is invalid.
+ * Uses occurrence=1 as default value when not specified. */
+ public static @Nullable String regexpExtract(String value, String regex, int
position) {
+ return regexpExtract(value, regex, position, 1);
+ }
+
+ /** SQL {@code REGEXP_EXTRACT(value, regexp, position, occurrence)} function.
+ * Returns NULL if there is no match, or if position or occurrence are
beyond range.
+ * Returns an exception if regex, position or occurrence are invalid. */
+ public static @Nullable String regexpExtract(String value, String regex, int
position,
+ int occurrence) {
+ // Uses java.util.regex as a standard for regex processing
+ // in Calcite instead of RE2 used by BigQuery/GoogleSQL
+ Pattern regexp;
+ String methodName = "REGEXP_EXTRACT";
+ try {
+ regexp = Pattern.compile(regex);
+ } catch (PatternSyntaxException ex) {
+ throw RESOURCE.invalidRegexInputForRegexpFunctions(ex.getMessage()
+ .replace(System.lineSeparator(), " "), methodName).ex();
+ }
+
+ Matcher matcher = regexp.matcher(value);
+
+ if (position <= 0) {
+ throw
RESOURCE.invalidIntegerInputForRegexpFunctions(Integer.toString(position),
+ "position", methodName).ex();
+ }
+ if (position > value.length()) {
+ return null;
+ }
+ matcher.region(position - 1, value.length());
+
+ if (occurrence <= 0) {
Review Comment:
Good point Tanner, technically the matcher is only initialized with the base
string and regex at the top, and the pattern matching happens when we call the
find() method on line 417, but I also think its better to catch the exceptions
first and then initialize it. I had tested the order of errors, will reorganize
them to do the exception handling first.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]