dbatomic commented on code in PR #45216:
URL: https://github.com/apache/spark/pull/45216#discussion_r1498991374
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -174,4 +174,291 @@ class CollationSuite extends QueryTest with
SharedSparkSession {
Row(expected))
}
}
+
+ test("Support contains string expression with Collation") {
+ // Test 'contains' with different collations
+ var listLeft: List[String] = List()
+ var listRight: List[String] = List()
+ var listResult: List[Boolean] = List()
+
+ // UCS_BASIC (default) & UNICODE collation
+ listLeft = List("", "c", "abc", "cde", "abde", "abcde", "C", "ABC", "CDE",
"ABDE", "ABCDE")
+ listRight = List("", "c", "abc", "cde", "abde", "abcde", "C", "ABC",
"CDE", "ABDE", "ABCDE")
+ listResult = List(
+ // "" c abc cde abde abcde C ABC CDE ABDE
ABCDE
+ true, false, false, false, false, false, false, false, false, false,
false, // ""
+ true, true, false, false, false, false, false, false, false, false,
false, // c
+ true, true, true, false, false, false, false, false, false, false,
false, // abc
+ true, true, false, true, false, false, false, false, false, false,
false, // cde
+ true, false, false, false, true, false, false, false, false, false,
false, // abde
+ true, true, true, true, false, true, false, false, false, false, false,
// abcde
+ true, false, false, false, false, false, true, false, false, false,
false, // C
+ true, false, false, false, false, false, true, true, false, false,
false, // ABC
+ true, false, false, false, false, false, true, false, true, false,
false, // CDE
+ true, false, false, false, false, false, false, false, false, true,
false, // ABDE
+ true, false, false, false, false, false, true, true, true, false, true)
// ABCDE
+ for {
+ (left, index_left) <- listLeft.zipWithIndex
+ (right, index_right) <- listRight.zipWithIndex
+ } {
+ val expectedAnswer = listResult(index_left * listRight.length +
index_right)
+ // UCS_BASIC (default)
+ checkAnswer(sql("SELECT contains('" + left + "', '" + right + "')"),
Row(expectedAnswer))
+ // UCS_BASIC
+ checkAnswer(sql("SELECT contains('" + left + "', collate('" +
+ right + "', 'UCS_BASIC'))"), Row(expectedAnswer))
+ checkAnswer(sql("SELECT contains(collate('" + left + "', 'UCS_BASIC'),
collate('" +
+ right + "', 'UCS_BASIC'))"), Row(expectedAnswer))
+ // UNICODE
+ checkAnswer(sql("SELECT contains('" + left + "', collate('" +
+ right + "', 'UNICODE'))"), Row(expectedAnswer))
+ checkAnswer(sql("SELECT contains(collate('" + left + "', 'UNICODE'),
collate('" +
+ right + "', 'UNICODE'))"), Row(expectedAnswer))
+ }
+
+
+ // UCS_BASIC_LCASE & UNICODE_CI collation
+ listResult = List(
+ // "" c abc cde abde abcde C ABC CDE ABDE
ABCDE
+ true, false, false, false, false, false, false, false, false, false,
false, // ""
+ true, true, false, false, false, false, true, false, false, false,
false, // c
+ true, true, true, false, false, false, true, true, false, false, false,
// abc
+ true, true, false, true, false, false, true, false, true, false, false,
// cde
+ true, false, false, false, true, false, false, false, false, true,
false, // abde
+ true, true, true, true, false, true, true, true, true, false, true,
// abcde
+ true, true, false, false, false, false, true, false, false, false,
false, // C
+ true, true, true, false, false, false, true, true, false, false, false,
// ABC
+ true, true, false, true, false, false, true, false, true, false, false,
// CDE
+ true, false, false, false, true, false, false, false, false, true,
false, // ABDE
+ true, true, true, true, false, true, true, true, true, false, true)
// ABCDE
+ for {
+ (left, index_left) <- listLeft.zipWithIndex
+ (right, index_right) <- listRight.zipWithIndex
+ } {
+ val expectedAnswer = listResult(index_left * listRight.length +
index_right)
+ // UCS_BASIC_LCASE
+ checkAnswer(sql("SELECT contains('" + left + "', collate('" +
+ right + "', 'UCS_BASIC_LCASE'))"), Row(expectedAnswer))
+ checkAnswer(sql("SELECT contains(collate('" + left + "',
'UCS_BASIC_LCASE'), collate('" +
+ right + "', 'UCS_BASIC_LCASE'))"), Row(expectedAnswer))
+ // UNICODE_CI
+ checkAnswer(sql("SELECT contains('" + left + "', collate('" +
+ right + "', 'UNICODE_CI'))"), Row(expectedAnswer))
+ checkAnswer(sql("SELECT contains(collate('" + left + "', 'UNICODE_CI'),
collate('" +
+ right + "', 'UNICODE_CI'))"), Row(expectedAnswer))
+ }
+ }
+
+ test("Support startsWith string expression with Collation") {
+ // Test 'startsWith' with different collations
+ var listLeft: List[String] = List()
+ var listRight: List[String] = List()
+ var listResult: List[Boolean] = List()
+
+ // UCS_BASIC (default) & UNICODE collation
+ listLeft = List("", "c", "abc", "cde", "abde", "abcde", "C", "ABC", "CDE",
"ABDE", "ABCDE")
+ listRight = List("", "c", "abc", "cde", "abde", "abcde", "C", "ABC",
"CDE", "ABDE", "ABCDE")
+ listResult = List(
+ // "" c abc cde abde abcde C ABC CDE ABDE
ABCDE
+ true, false, false, false, false, false, false, false, false, false,
false, // ""
+ true, true, false, false, false, false, false, false, false, false,
false, // c
+ true, false, true, false, false, false, false, false, false, false,
false, // abc
+ true, true, false, true, false, false, false, false, false, false,
false, // cde
+ true, false, false, false, true, false, false, false, false, false,
false, // abde
+ true, false, true, false, false, true, false, false, false, false,
false, // abcde
+ true, false, false, false, false, false, true, false, false, false,
false, // C
+ true, false, false, false, false, false, false, true, false, false,
false, // ABC
+ true, false, false, false, false, false, true, false, true, false,
false, // CDE
+ true, false, false, false, false, false, false, false, false, true,
false, // ABDE
+ true, false, false, false, false, false, false, true, false, false,
true) // ABCDE
+ for {
+ (left, index_left) <- listLeft.zipWithIndex
+ (right, index_right) <- listRight.zipWithIndex
+ } {
+ val expectedAnswer = listResult(index_left * listRight.length +
index_right)
+ // UCS_BASIC (default)
+ checkAnswer(sql("SELECT startswith('" + left + "', '" + right + "')"),
Row(expectedAnswer))
+ // UCS_BASIC
+ checkAnswer(sql("SELECT startswith('" + left + "', collate('" +
+ right + "', 'UCS_BASIC'))"), Row(expectedAnswer))
+ checkAnswer(sql("SELECT startswith(collate('" + left + "', 'UCS_BASIC'),
collate('" +
+ right + "', 'UCS_BASIC'))"), Row(expectedAnswer))
+ // UNICODE
+ checkAnswer(sql("SELECT startswith('" + left + "', collate('" +
+ right + "', 'UNICODE'))"), Row(expectedAnswer))
+ checkAnswer(sql("SELECT startswith(collate('" + left + "', 'UNICODE'),
collate('" +
+ right + "', 'UNICODE'))"), Row(expectedAnswer))
+ }
+
+ // UCS_BASIC_LCASE & UNICODE_CI collation
+ listResult = List(
+ // "" c abc cde abde abcde C ABC CDE ABDE
ABCDE
+ true, false, false, false, false, false, false, false, false, false,
false, // ""
+ true, true, false, false, false, false, true, false, false, false,
false, // c
+ true, false, true, false, false, false, false, true, false, false,
false, // abc
+ true, true, false, true, false, false, true, false, true, false, false,
// cde
+ true, false, false, false, true, false, false, false, false, true,
false, // abde
+ true, false, true, false, false, true, false, true, false, false, true,
// abcde
+ true, true, false, false, false, false, true, false, false, false,
false, // C
+ true, false, true, false, false, false, false, true, false, false,
false, // ABC
+ true, true, false, true, false, false, true, false, true, false, false,
// CDE
+ true, false, false, false, true, false, false, false, false, true,
false, // ABDE
+ true, false, true, false, false, true, false, true, false, false, true)
// ABCDE
+ for {
+ (left, index_left) <- listLeft.zipWithIndex
+ (right, index_right) <- listRight.zipWithIndex
+ } {
+ val expectedAnswer = listResult(index_left * listRight.length +
index_right)
+ // UCS_BASIC_LCASE
+ checkAnswer(sql("SELECT startswith('" + left + "', collate('" +
+ right + "', 'UCS_BASIC_LCASE'))"), Row(expectedAnswer))
+ checkAnswer(sql("SELECT startswith(collate('" + left + "',
'UCS_BASIC_LCASE'), collate('" +
+ right + "', 'UCS_BASIC_LCASE'))"), Row(expectedAnswer))
+ // UNICODE_CI
+ checkAnswer(sql("SELECT startswith('" + left + "', collate('" +
+ right + "', 'UNICODE_CI'))"), Row(expectedAnswer))
+ checkAnswer(sql("SELECT startswith(collate('" + left + "',
'UNICODE_CI'), collate('" +
+ right + "', 'UNICODE_CI'))"), Row(expectedAnswer))
+ }
+
+ // Serbian language collation tests
Review Comment:
Guess that this can be removed :)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]