miland-db commented on code in PR #45643:
URL: https://github.com/apache/spark/pull/45643#discussion_r1537422910
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala:
##########
@@ -70,6 +74,152 @@ class CollationStringExpressionsSuite extends QueryTest
with SharedSparkSession
})
}
+ test("INSTR check result on non-explicit default collation") {
+ checkEvaluation(StringInstr(Literal("aAads"), Literal("Aa")), 2)
+ }
+
+ test("INSTR check result on explicitly collated strings") {
+ // UTF8_BINARY_LCASE
+ checkEvaluation(StringInstr(Literal.create("aaads", StringType(1)),
+ Literal.create("Aa", StringType(1))), 1)
+ checkEvaluation(StringInstr(Collate(Literal("aaads"), "UTF8_BINARY_LCASE"),
+ Collate(Literal("Aa"), "UTF8_BINARY_LCASE")), 1)
+ checkEvaluation(StringInstr(Collate(Literal("aaads"), "UTF8_BINARY_LCASE"),
+ Collate(Literal("de"), "UTF8_BINARY_LCASE")), 0)
+ // UNICODE
+ checkEvaluation(StringInstr(Literal.create("aaads", StringType(2)),
+ Literal.create("Aa", StringType(2))), 0)
+ checkEvaluation(StringInstr(Collate(Literal("aaads"), "UNICODE"),
+ Collate(Literal("de"), "UNICODE")), 0)
+ // UNICODE_CI
+ checkEvaluation(StringInstr(Literal.create("aaads", StringType(3)),
+ Literal.create("de", StringType(3))), 0)
+ checkEvaluation(StringInstr(Collate(Literal("aaads"), "UNICODE_CI"),
+ Collate(Literal("AD"), "UNICODE_CI")), 3)
+ }
+
+ test("INSTR fail mismatched collation types") {
+ // UNICODE and UNICODE_CI
+ val expr1 = StringInstr(Collate(Literal("aaads"), "UNICODE"),
+ Collate(Literal("Aa"), "UNICODE_CI"))
+ assert(expr1.checkInputDataTypes() ==
+ DataTypeMismatch(
+ errorSubClass = "COLLATION_MISMATCH",
+ messageParameters = Map(
+ "collationNameLeft" -> "UNICODE",
+ "collationNameRight" -> "UNICODE_CI"
+ )
+ )
+ )
+ // DEFAULT(UTF8_BINARY) and UTF8_BINARY_LCASE
+ val expr2 = StringInstr(Literal("aaads"),
+ Collate(Literal("Aa"), "UTF8_BINARY_LCASE"))
+ assert(expr2.checkInputDataTypes() ==
+ DataTypeMismatch(
+ errorSubClass = "COLLATION_MISMATCH",
+ messageParameters = Map(
+ "collationNameLeft" -> "UTF8_BINARY",
+ "collationNameRight" -> "UTF8_BINARY_LCASE"
+ )
+ )
+ )
+ // UTF8_BINARY_LCASE and UNICODE_CI
+ val expr3 = StringInstr(Collate(Literal("aaads"), "UTF8_BINARY_LCASE"),
+ Collate(Literal("Aa"), "UNICODE_CI"))
+ assert(expr3.checkInputDataTypes() ==
+ DataTypeMismatch(
+ errorSubClass = "COLLATION_MISMATCH",
+ messageParameters = Map(
+ "collationNameLeft" -> "UTF8_BINARY_LCASE",
+ "collationNameRight" -> "UNICODE_CI"
+ )
+ )
+ )
+ }
+
+ test("FIND_IN_SET check result on non-explicit default collation") {
+ checkEvaluation(FindInSet(Literal("def"), Literal("abc,b,ab,c,def")), 5)
Review Comment:
Same as the above, I wanted to be sure that I didn't break something
already. I will remove it
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]