stefankandic commented on code in PR #47958:
URL: https://github.com/apache/spark/pull/47958#discussion_r1741077424
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -33,7 +35,7 @@ import
org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
import org.apache.spark.sql.execution.aggregate.{HashAggregateExec,
ObjectHashAggregateExec}
import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
import org.apache.spark.sql.execution.joins._
-import org.apache.spark.sql.internal.{SqlApiConf, SQLConf}
+import org.apache.spark.sql.internal.{SQLConf, SqlApiConf}
Review Comment:
unneeded change
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -186,6 +188,50 @@ class CollationSuite extends DatasourceV2SQLBase with
AdaptiveSparkPlanHelper {
}
}
+ test("check difference betweeen SR_AI and SR_Latn_AI collations") {
+ // scalastyle:off nonascii
+ // SR_AI collation
+ var collationName = "SR_AI"
+ var builder = new ULocale.Builder();
Review Comment:
We should still test spark's and not directly the ICU behavior, even though
we use ICU under the hood
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -186,6 +188,50 @@ class CollationSuite extends DatasourceV2SQLBase with
AdaptiveSparkPlanHelper {
}
}
+ test("check difference betweeen SR_AI and SR_Latn_AI collations") {
+ // scalastyle:off nonascii
+ // SR_AI collation
+ var collationName = "SR_AI"
+ var builder = new ULocale.Builder();
+ builder.setLocale(new ULocale(collationName))
+ builder.setUnicodeLocaleKeyword("ks", "level1")
+ builder.setUnicodeLocaleKeyword("kc", "true")
+ var collator = Collator.getInstance(builder.build())
+
+ assert(collator.compare("cCćĆčČšŠžŽ", "čČcCćĆsSzZ") == 0)
+ checkAnswer(sql(s"SELECT 'cCćĆčČšŠžŽ' = 'čČcCćĆsSzZ' COLLATE
$collationName"), Row(true))
+
+ // SR_Latn_AI collation
+ collationName = "SR_Latn_AI"
+ builder = new ULocale.Builder();
+ builder.setLocale(new ULocale(collationName))
+ builder.setUnicodeLocaleKeyword("ks", "level1")
+ builder.setUnicodeLocaleKeyword("kc", "true")
+ collator = Collator.getInstance(builder.build())
+
+ assert(collator.compare("c", "ć") != 0)
Review Comment:
we can probably put this inside a loop to avoid repetitive calls
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]