dbatomic commented on code in PR #45216:
URL: https://github.com/apache/spark/pull/45216#discussion_r1509083011
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -183,6 +183,266 @@ class CollationSuite extends DatasourceV2SQLBase {
}
}
+ test("checkCollation throws exception for incompatible collationIds") {
+ val left: String = "abc" // collate with 'UNICODE_CI'
+ val leftCollationName: String = "UNICODE_CI";
+ var right: String = null // collate with 'UNICODE'
+ val rightCollationName: String = "UNICODE";
+ // contains
+ right = left.substring(1, 2);
+ checkError(
+ exception = intercept[SparkException] {
+ spark.sql(s"SELECT contains(collate('$left', '$leftCollationName')," +
+ s"collate('$right', '$rightCollationName'))").collect()
+ },
+ errorClass = "COLLATION_MISMATCH",
+ sqlState = "42K09",
+ parameters = Map(
+ "collationNameLeft" -> s"$leftCollationName",
+ "collationNameRight" -> s"$rightCollationName"
+ )
+ )
+ // startsWith
+ right = left.substring(0, 1);
+ checkError(
+ exception = intercept[SparkException] {
+ spark.sql(s"SELECT startsWith(collate('$left', '$leftCollationName'),"
+
+ s"collate('$right', '$rightCollationName'))").collect()
+ },
+ errorClass = "COLLATION_MISMATCH",
+ sqlState = "42K09",
+ parameters = Map(
+ "collationNameLeft" -> s"$leftCollationName",
+ "collationNameRight" -> s"$rightCollationName"
+ )
+ )
+ // endsWith
+ right = left.substring(2, 3);
+ checkError(
+ exception = intercept[SparkException] {
+ spark.sql(s"SELECT endsWith(collate('$left', '$leftCollationName')," +
+ s"collate('$right', '$rightCollationName'))").collect()
+ },
+ errorClass = "COLLATION_MISMATCH",
+ sqlState = "42K09",
+ parameters = Map(
+ "collationNameLeft" -> s"$leftCollationName",
+ "collationNameRight" -> s"$rightCollationName"
+ )
+ )
+ }
+
+ test("Support contains string expression with Collation") {
+ // Test 'contains' with different collations
+ var listLeft: List[String] = List()
+ var listRight: List[String] = List()
+ var listResult: List[Boolean] = List()
+
+ // UCS_BASIC (default) & UNICODE collation
+ listLeft = List("", "c", "abc", "cde", "abde", "abcde", "C", "ABC", "CDE",
"ABDE", "ABCDE")
+ listRight = List("", "c", "abc", "cde", "abde", "abcde", "C", "ABC",
"CDE", "ABDE", "ABCDE")
+ listResult = List(
+ // "" c abc cde abde abcde C ABC CDE ABDE
ABCDE
+ true, false, false, false, false, false, false, false, false, false,
false, // ""
+ true, true, false, false, false, false, false, false, false, false,
false, // c
+ true, true, true, false, false, false, false, false, false, false,
false, // abc
+ true, true, false, true, false, false, false, false, false, false,
false, // cde
+ true, false, false, false, true, false, false, false, false, false,
false, // abde
+ true, true, true, true, false, true, false, false, false, false, false,
// abcde
+ true, false, false, false, false, false, true, false, false, false,
false, // C
+ true, false, false, false, false, false, true, true, false, false,
false, // ABC
+ true, false, false, false, false, false, true, false, true, false,
false, // CDE
+ true, false, false, false, false, false, false, false, false, true,
false, // ABDE
+ true, false, false, false, false, false, true, true, true, false, true)
// ABCDE
Review Comment:
Can we do case class checks, similarly to what we were doing in
`CollationFactorySuite.scala`?
```scala
case class CollationTestCase[R](collationName: String, s1: String, s2:
String, expectedResult: R)
test("collation aware equality and hash") {
val checks = Seq(
CollationTestCase("UCS_BASIC", "aaa", "aaa", true),
CollationTestCase("UCS_BASIC", "aaa", "AAA", false),
CollationTestCase("UCS_BASIC", "aaa", "bbb", false),
CollationTestCase("UCS_BASIC_LCASE", "aaa", "aaa", true),
CollationTestCase("UCS_BASIC_LCASE", "aaa", "AAA", true),
CollationTestCase("UCS_BASIC_LCASE", "aaa", "AaA", true),
CollationTestCase("UCS_BASIC_LCASE", "aaa", "AaA", true),
CollationTestCase("UCS_BASIC_LCASE", "aaa", "aa", false),
CollationTestCase("UCS_BASIC_LCASE", "aaa", "bbb", false),
CollationTestCase("UNICODE", "aaa", "aaa", true),
CollationTestCase("UNICODE", "aaa", "AAA", false),
CollationTestCase("UNICODE", "aaa", "bbb", false),
CollationTestCase("UNICODE_CI", "aaa", "aaa", true),
CollationTestCase("UNICODE_CI", "aaa", "AAA", true),
CollationTestCase("UNICODE_CI", "aaa", "bbb", false))
```
Also, having so many test cases may indeed be overkill :) I think that:
1) empty string
2/3) single char upper + lower
4/5) multichar upper + lower
6) Seq that is not contained anywhere.
Would suffice.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]