This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 3bdf146bbee5 [SPARK-49611][SQL][FOLLOW-UP] Fix wrong results of
collations() TVF
3bdf146bbee5 is described below
commit 3bdf146bbee58d207afaadc92024d9f6c4b941dd
Author: Mihailo Milosevic <[email protected]>
AuthorDate: Thu Sep 19 09:27:38 2024 +0200
[SPARK-49611][SQL][FOLLOW-UP] Fix wrong results of collations() TVF
### What changes were proposed in this pull request?
Fix of accent sensitive and case sensitive column results.
### Why are the changes needed?
When initial PR was introduced, ICU collation listing ended up with
different order of generating columns so results were wrong.
### Does this PR introduce _any_ user-facing change?
No, as spark 4.0 was not released yet.
### How was this patch tested?
Existing test in CollationSuite.scala, which was wrong in the first place.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #48152 from mihailom-db/tvf-collations-followup.
Authored-by: Mihailo Milosevic <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../spark/sql/catalyst/util/CollationFactory.java | 4 ++--
.../org/apache/spark/sql/CollationSuite.scala | 24 +++++++++++-----------
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git
a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
index 4b88e15e8ed7..87558971042e 100644
---
a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
+++
b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
@@ -773,8 +773,8 @@ public final class CollationFactory {
ICULocaleMap.get(locale).getDisplayCountry(),
VersionInfo.ICU_VERSION.toString(),
COLLATION_PAD_ATTRIBUTE,
- caseSensitivity == CaseSensitivity.CS,
- accentSensitivity == AccentSensitivity.AS);
+ accentSensitivity == AccentSensitivity.AS,
+ caseSensitivity == CaseSensitivity.CS);
}
/**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
index d5d18b1ab081..73fd897e91f5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
@@ -1661,17 +1661,17 @@ class CollationSuite extends DatasourceV2SQLBase with
AdaptiveSparkPlanHelper {
Row("SYSTEM", "BUILTIN", "UNICODE", "", "",
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "UNICODE_AI", "", "",
- "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
- Row("SYSTEM", "BUILTIN", "UNICODE_CI", "", "",
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "UNICODE_CI", "", "",
+ "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "UNICODE_CI_AI", "", "",
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "af", "Afrikaans", "",
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "af_AI", "Afrikaans", "",
- "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
- Row("SYSTEM", "BUILTIN", "af_CI", "Afrikaans", "",
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "af_CI", "Afrikaans", "",
+ "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "af_CI_AI", "Afrikaans", "",
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))
@@ -1683,9 +1683,9 @@ class CollationSuite extends DatasourceV2SQLBase with
AdaptiveSparkPlanHelper {
Seq(Row("SYSTEM", "BUILTIN", "zh_Hant_HKG", "Chinese", "Hong Kong SAR
China",
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_AI", "Chinese", "Hong Kong SAR
China",
- "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
- Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI", "Chinese", "Hong Kong SAR
China",
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI", "Chinese", "Hong Kong SAR
China",
+ "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI_AI", "Chinese", "Hong Kong
SAR China",
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))
@@ -1693,9 +1693,9 @@ class CollationSuite extends DatasourceV2SQLBase with
AdaptiveSparkPlanHelper {
Seq(Row("SYSTEM", "BUILTIN", "zh_Hans_SGP", "Chinese", "Singapore",
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_AI", "Chinese", "Singapore",
- "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
- Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_CI", "Chinese", "Singapore",
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_CI", "Chinese", "Singapore",
+ "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_CI_AI", "Chinese", "Singapore",
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))
@@ -1704,17 +1704,17 @@ class CollationSuite extends DatasourceV2SQLBase with
AdaptiveSparkPlanHelper {
Seq(Row("SYSTEM", "BUILTIN", "en_USA", "English", "United States",
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "en_USA_AI", "English", "United States",
- "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
- Row("SYSTEM", "BUILTIN", "en_USA_CI", "English", "United States",
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
+ Row("SYSTEM", "BUILTIN", "en_USA_CI", "English", "United States",
+ "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "en_USA_CI_AI", "English", "United States",
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))
checkAnswer(sql("SELECT NAME, LANGUAGE, ACCENT_SENSITIVITY,
CASE_SENSITIVITY " +
"FROM collations() WHERE COUNTRY = 'United States'"),
Seq(Row("en_USA", "English", "ACCENT_SENSITIVE", "CASE_SENSITIVE"),
- Row("en_USA_AI", "English", "ACCENT_SENSITIVE", "CASE_INSENSITIVE"),
- Row("en_USA_CI", "English", "ACCENT_INSENSITIVE", "CASE_SENSITIVE"),
+ Row("en_USA_AI", "English", "ACCENT_INSENSITIVE", "CASE_SENSITIVE"),
+ Row("en_USA_CI", "English", "ACCENT_SENSITIVE", "CASE_INSENSITIVE"),
Row("en_USA_CI_AI", "English", "ACCENT_INSENSITIVE",
"CASE_INSENSITIVE")))
checkAnswer(sql("SELECT NAME FROM collations() WHERE ICU_VERSION is null"),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]