This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 8f82d0f3dc17 [SPARK-49663][SQL] Enable RTRIM suggestions in collation 
expressions
8f82d0f3dc17 is described below

commit 8f82d0f3dc1790ca17816fd7b8fb908b7c84fd90
Author: Jovan Pavlovic <[email protected]>
AuthorDate: Wed Oct 30 11:51:57 2024 +0100

    [SPARK-49663][SQL] Enable RTRIM suggestions in collation expressions
    
    ### What changes were proposed in this pull request?
    Add RTRIM collation suggestion for collation expressions in case of the 
wrong name.
    
    ### Why are the changes needed?
    Better customer experience.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes.
    
    ### How was this patch tested?
    Added tests in CollationFactorySuite.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #48545 from jovanpavl-db/rtrim_closest_suggestions.
    
    Authored-by: Jovan Pavlovic <[email protected]>
    Signed-off-by: Max Gekk <[email protected]>
---
 .../spark/sql/catalyst/util/CollationFactory.java  | 19 ++++++---
 .../spark/unsafe/types/CollationFactorySuite.scala | 46 +++++++++++++++-------
 2 files changed, 44 insertions(+), 21 deletions(-)

diff --git 
a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
 
b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
index 3117854a432b..ad5e5ae845f8 100644
--- 
a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
+++ 
b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
@@ -1278,19 +1278,26 @@ public final class CollationFactory {
         Collation.CollationSpecUTF8.UTF8_BINARY_COLLATION.collationName,
         Collation.CollationSpecUTF8.UTF8_LCASE_COLLATION.collationName
       };
-      validModifiers = new String[0];
+      validModifiers = new String[]{"_RTRIM"};
     } else {
       validRootNames = getICULocaleNames();
-      validModifiers = new String[]{"_CI", "_AI", "_CS", "_AS"};
+      validModifiers = new String[]{"_CI", "_AI", "_CS", "_AS", "_RTRIM"};
     }
 
     // Split modifiers and locale name.
-    final int MODIFIER_LENGTH = 3;
+    boolean foundModifier = true;
     String localeName = collationName.toUpperCase();
     List<String> modifiers = new ArrayList<>();
-    while (Arrays.stream(validModifiers).anyMatch(localeName::endsWith)) {
-      modifiers.add(localeName.substring(localeName.length() - 
MODIFIER_LENGTH));
-      localeName = localeName.substring(0, localeName.length() - 
MODIFIER_LENGTH);
+    while (foundModifier) {
+      foundModifier = false;
+      for (String modifier : validModifiers) {
+        if (localeName.endsWith(modifier)) {
+          modifiers.add(modifier);
+          localeName = localeName.substring(0, localeName.length() - 
modifier.length());
+          foundModifier = true;
+          break;
+        }
+      }
     }
 
     // Suggest version with unique modifiers.
diff --git 
a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
 
b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
index 1b16432e6378..0e94073e4773 100644
--- 
a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
+++ 
b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
@@ -100,27 +100,33 @@ class CollationFactorySuite extends AnyFunSuite with 
Matchers { // scalastyle:ig
     Seq(
       ("UTF8_BINARY_CS", "UTF8_BINARY"),
       ("UTF8_BINARY_AS", "UTF8_BINARY"), // this should be UNICODE_AS
-      ("UTF8_BINARY_CS_AS","UTF8_BINARY"), // this should be UNICODE_CS_AS
-      ("UTF8_BINARY_AS_CS","UTF8_BINARY"),
-      ("UTF8_BINARY_CI","UTF8_BINARY"),
-      ("UTF8_BINARY_AI","UTF8_BINARY"),
-      ("UTF8_BINARY_CI_AI","UTF8_BINARY"),
-      ("UTF8_BINARY_AI_CI","UTF8_BINARY"),
-      ("UTF8_BS","UTF8_LCASE"),
-      ("BINARY_UTF8","ar_SAU"),
-      ("UTF8_BINARY_A","UTF8_BINARY"),
-      ("UNICODE_X","UNICODE"),
-      ("UNICODE_CI_X","UNICODE"),
-      ("UNICODE_LCASE_X","UNICODE"),
-      ("UTF8_UNICODE","UTF8_LCASE"),
-      ("UTF8_BINARY_UNICODE","UTF8_BINARY"),
+      ("UTF8_BINARY_CS_AS", "UTF8_BINARY"), // this should be UNICODE_CS_AS
+      ("UTF8_BINARY_AS_CS", "UTF8_BINARY"),
+      ("UTF8_BINARY_CI", "UTF8_BINARY"),
+      ("UTF8_BINARY_AI", "UTF8_BINARY"),
+      ("UTF8_BINARY_CI_AI", "UTF8_BINARY"),
+      ("UTF8_BINARY_AI_CI", "UTF8_BINARY"),
+      ("UTF8_BINARY_AI_RTRIM", "UTF8_BINARY_RTRIM"),
+      ("UTF8_BINARY_CI_RTRIM", "UTF8_BINARY_RTRIM"),
+      ("UTF8_BINARY_AI_CI_RTRIM", "UTF8_BINARY_RTRIM"),
+      ("UTF8_BS", "UTF8_LCASE"),
+      ("BINARY_UTF8", "ar_SAU"),
+      ("UTF8_BINARY_A", "UTF8_BINARY"),
+      ("UNICODE_X", "UNICODE"),
+      ("UNICODE_CI_X", "UNICODE"),
+      ("UNICODE_LCASE_X", "UNICODE"),
+      ("UNICODE_RTRIM_LCASE_X", "UNICODE"),
+      ("UTF8_UNICODE", "UTF8_LCASE"),
+      ("UTF8_BINARY_UNICODE", "UTF8_BINARY"),
       ("CI_UNICODE", "UNICODE"),
       ("LCASE_UNICODE", "UNICODE"),
+      ("RTRIM_UNICODE", "UNICODE"),
       ("UNICODE_UNSPECIFIED", "UNICODE"),
       ("UNICODE_CI_UNSPECIFIED", "UNICODE"),
       ("UNICODE_UNSPECIFIED_CI_UNSPECIFIED", "UNICODE"),
       ("UNICODE_INDETERMINATE", "UNICODE"),
-      ("UNICODE_CI_INDETERMINATE", "UNICODE")
+      ("UNICODE_CI_INDETERMINATE", "UNICODE"),
+      ("UNICODE_RTRIM_INDETERMINATE", "UNICODE")
     ).foreach{case (collationName, proposals) =>
       checkCollationNameError(collationName, proposals)
     }
@@ -372,15 +378,23 @@ class CollationFactorySuite extends AnyFunSuite with 
Matchers { // scalastyle:ig
       ("CI_en", "ceb"),
       ("USA_CI_en", "UNICODE"),
       ("en_CI_USA", "en_USA"),
+      ("en_RTRIM_USA", "en_USA"),
       ("CI_sr_Cyrl_SRB", "sr_Cyrl_SRB"),
+      ("RTRIM_sr_Cyrl_SRB", "sr_Cyrl_SRB"),
       ("sr_CI_Cyrl_SRB", "sr_Cyrl_SRB"),
+      ("sr_RTRIM_Cyrl_SRB", "sr_Cyrl_SRB"),
       ("sr_Cyrl_CI_SRB", "sr_Cyrl_SRB"),
+      ("sr_Cyrl_RTRIM_SRB", "sr_Cyrl_SRB"),
       ("CI_Cyrl_sr", "sr_Cyrl_SRB"),
+      ("RTRIM_Cyrl_sr", "sr_Cyrl_SRB"),
       ("Cyrl_CI_sr", "he_ISR"),
       ("Cyrl_CI_sr_SRB", "sr_Cyrl_SRB"),
+      ("Cyrl_RTRIM_sr_SRB", "sr_Cyrl_SRB"),
       ("Cyrl_sr_CI_SRB", "sr_Cyrl_SRB"),
+      ("Cyrl_sr_RTRIM_SRB", "sr_Cyrl_SRB"),
       // no locale specified
       ("_CI_AI", "af_CI_AI, am_CI_AI, ar_CI_AI"),
+      ("_CI_AI_RTRIM", "af_CI_AI_RTRIM, am_CI_AI_RTRIM, ar_CI_AI_RTRIM"),
       ("", "af, am, ar")
     ).foreach { case (collationName, proposals) =>
       checkCollationNameError(collationName, proposals)
@@ -476,6 +490,7 @@ class CollationFactorySuite extends AnyFunSuite with 
Matchers { // scalastyle:ig
       ("UNICODE_CI_CI", "UNICODE_CI"),
       ("UNICODE_CI_CS", "UNICODE_CS"),
       ("UNICODE_CS_CI", "UNICODE_CS"),
+      ("UNICODE_RTRIM_RTRIM", "UNICODE_RTRIM"),
       ("UNICODE_AS_AS", "UNICODE_AS"),
       ("UNICODE_AI_AI", "UNICODE_AI"),
       ("UNICODE_AS_AI", "UNICODE_AS"),
@@ -485,6 +500,7 @@ class CollationFactorySuite extends AnyFunSuite with 
Matchers { // scalastyle:ig
       ("UNICODE_CS_AS_CI_AI", "UNICODE_CS_AS"),
       ("UNICODE__CS__AS", "UNICODE_AS"),
       ("UNICODE-CS-AS", "UNICODE"),
+      ("UNICODE__CS__RTRIM", "UNICODE_RTRIM"),
       ("UNICODECSAS", "UNICODE"),
       ("_CS_AS_UNICODE", "UNICODE")
     ).foreach { case (collationName, proposals) =>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to