uros-db commented on code in PR #45725:
URL: https://github.com/apache/spark/pull/45725#discussion_r1584432496
##########
common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java:
##########
@@ -441,6 +444,45 @@ public static int execICU(final UTF8String string, final
UTF8String substring, f
}
}
+ public static class SubstringIndex {
+ public static UTF8String exec(final UTF8String string, final UTF8String
delimiter,
+ final int count, final int collationId) {
+ CollationFactory.Collation collation =
CollationFactory.fetchCollation(collationId);
+ if (collation.supportsBinaryEquality) {
+ return execBinary(string, delimiter, count);
+ } else if (collation.supportsLowercaseEquality) {
+ return execLowercase(string, delimiter, count);
+ } else {
+ return execICU(string, delimiter, count, collationId);
+ }
+ }
+ public static String genCode(final String string, final String delimiter,
+ final int count, final int collationId) {
+ CollationFactory.Collation collation =
CollationFactory.fetchCollation(collationId);
+ String expr = "CollationSupport.SubstringIndex.exec";
+ if (collation.supportsBinaryEquality) {
+ return String.format(expr + "Binary(%s, %s, %d)", string, delimiter,
count);
+ } else if (collation.supportsLowercaseEquality) {
+ return String.format(expr + "Lowercase(%s, %s, %d)", string,
delimiter, count);
+ } else {
+ return String.format(expr + "ICU(%s, %s, %d, %d)", string, delimiter,
count, collationId);
+ }
+ }
+ public static UTF8String execBinary(final UTF8String string, final
UTF8String delimiter,
+ final int count) {
+ return string.subStringIndex(delimiter, count);
+ }
+ public static UTF8String execLowercase(final UTF8String string, final
UTF8String delimiter,
+ final int count) {
+ return CollationAwareUTF8String.lowercaseSubStringIndex(string,
delimiter, count);
Review Comment:
agreed, we should do this in https://github.com/apache/spark/pull/45820
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]