loftiest commented on code in PR #56498:
URL: https://github.com/apache/spark/pull/56498#discussion_r3421780781
##########
common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java:
##########
@@ -809,6 +809,74 @@ private static int lowercaseIndexOfSlow(final UTF8String
target, final UTF8Strin
return lowercaseFind(target, lowerCaseCodePoints(pattern), start);
}
+ /**
+ * Returns the position of the {@code occurrence}-th occurrence of the
pattern string in the
+ * target string, starting from the specified position (1-based index
referring to character
+ * position in UTF8String), with respect to the UTF8_LCASE collation. If the
pattern is not
+ * found, {@code MATCH_NOT_FOUND} is returned.
+ *
+ * @param target the string to be searched in
+ * @param pattern the string to be searched for
+ * @param start the start position for searching (1-based, can be negative
for backward search)
+ * @param occurrence which occurrence to return (must be >= 1)
+ * @return the position of the {@code occurrence}-th occurrence of pattern
in target
+ * (0-based character index), or {@code MATCH_NOT_FOUND} if not found
+ */
+ public static int lowercaseIndexOf(final UTF8String target, final UTF8String
pattern,
+ final int start, final int occurrence) {
+ assert occurrence > 0;
+ if (pattern.numBytes() == 0) return target.indexOfEmpty(start);
+ if (start == 0) return MATCH_NOT_FOUND;
+ if (target.isFullAscii() && pattern.isFullAscii()) {
+ return target.toLowerCase().indexOf(pattern.toLowerCase(), start,
occurrence);
Review Comment:
Applied, thanks!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]