uros-db commented on code in PR #46720:
URL: https://github.com/apache/spark/pull/46720#discussion_r1612993851


##########
common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java:
##########
@@ -208,87 +208,99 @@ public static boolean execICU(final UTF8String l, final 
UTF8String r,
   public static class Upper {
     public static UTF8String exec(final UTF8String v, final int collationId) {
       CollationFactory.Collation collation = 
CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality || 
collation.supportsLowercaseEquality) {
+      if (collation.supportsBinaryEquality) {
         return execUTF8(v);
-      } else {
+      } else if (collation.supportsLowercaseEquality) {
+        return execLowercase(v);
+      }  else {
         return execICU(v, collationId);
       }
     }
     public static String genCode(final String v, final int collationId) {
       CollationFactory.Collation collation = 
CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.Upper.exec";
-      if (collation.supportsBinaryEquality || 
collation.supportsLowercaseEquality) {
+      if (collation.supportsBinaryEquality) {
         return String.format(expr + "UTF8(%s)", v);
-      } else {
+      } else if (collation.supportsLowercaseEquality) {
+        return String.format(expr + "Lowercase(%s)", v);
+      }  else {
         return String.format(expr + "ICU(%s, %d)", v, collationId);
       }
     }
     public static UTF8String execUTF8(final UTF8String v) {
       return v.toUpperCase();
     }
+    public static UTF8String execLowercase(final UTF8String v) {
+      return CollationAwareUTF8String.toUpperCase(v);
+    }
     public static UTF8String execICU(final UTF8String v, final int 
collationId) {
-      return 
UTF8String.fromString(CollationAwareUTF8String.toUpperCase(v.toString(), 
collationId));
+      return CollationAwareUTF8String.toUpperCase(v, collationId);
     }
   }
 
   public static class Lower {
     public static UTF8String exec(final UTF8String v, final int collationId) {
       CollationFactory.Collation collation = 
CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality || 
collation.supportsLowercaseEquality) {
+      if (collation.supportsBinaryEquality) {
         return execUTF8(v);
+      } else if (collation.supportsLowercaseEquality) {
+        return execLowercase(v);
       } else {
         return execICU(v, collationId);
       }
     }
     public static String genCode(final String v, final int collationId) {
       CollationFactory.Collation collation = 
CollationFactory.fetchCollation(collationId);
         String expr = "CollationSupport.Lower.exec";
-      if (collation.supportsBinaryEquality || 
collation.supportsLowercaseEquality) {
+      if (collation.supportsBinaryEquality) {
         return String.format(expr + "UTF8(%s)", v);
-      } else {
+      } else if (collation.supportsLowercaseEquality) {
+        return String.format(expr + "Lowercase(%s)", v);
+      }  else {
         return String.format(expr + "ICU(%s, %d)", v, collationId);
       }
     }
     public static UTF8String execUTF8(final UTF8String v) {
       return v.toLowerCase();
     }
+    public static UTF8String execLowercase(final UTF8String v) {
+      return CollationAwareUTF8String.toLowerCase(v);
+    }
     public static UTF8String execICU(final UTF8String v, final int 
collationId) {
-      return 
UTF8String.fromString(CollationAwareUTF8String.toLowerCase(v.toString(), 
collationId));
+      return CollationAwareUTF8String.toLowerCase(v, collationId);
     }
   }
 
   public static class InitCap {
     public static UTF8String exec(final UTF8String v, final int collationId) {
       CollationFactory.Collation collation = 
CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality || 
collation.supportsLowercaseEquality) {
+      if (collation.supportsBinaryEquality) {
         return execUTF8(v);
+      } else if (collation.supportsLowercaseEquality) {
+        return execLowercase(v);
       } else {
         return execICU(v, collationId);
       }
     }
-
     public static String genCode(final String v, final int collationId) {
       CollationFactory.Collation collation = 
CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.InitCap.exec";
-      if (collation.supportsBinaryEquality || 
collation.supportsLowercaseEquality) {
+      if (collation.supportsBinaryEquality) {
         return String.format(expr + "UTF8(%s)", v);
+      } else if (collation.supportsLowercaseEquality) {
+        return String.format(expr + "Lowercase(%s)", v);
       } else {
         return String.format(expr + "ICU(%s, %d)", v, collationId);
       }
     }
-
     public static UTF8String execUTF8(final UTF8String v) {
       return v.toLowerCase().toTitleCase();
     }
-

Review Comment:
   so this code is now removed, but you're referring to the UTF8_BINARY_LCASE 
(default UTF8String / pre-collation Spark) implementation:
   
   ```
   public static UTF8String execUTF8(final UTF8String v) {
     return v.toLowerCase().toTitleCase();
   }
   ```
   
   so essentially this is how it was done in Spark before introducing collation 
awareness for InitCap (i.e. `public static class InitCap` in 
`CollationSupport`), so no breaking changes
   
   here's the old code for reference (from `UTF8String`):
   
   ```
     override def nullSafeEval(string: Any): Any = {
       // scalastyle:off caselocale
       string.asInstanceOf[UTF8String].toLowerCase.toTitleCase
       // scalastyle:on caselocale
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to