uros-db commented on code in PR #45933:
URL: https://github.com/apache/spark/pull/45933#discussion_r1567177666
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala:
##########
@@ -161,6 +161,78 @@ class CollationStringExpressionsSuite
})
}
+ test("Ascii & UnBase64 string expressions with collation") {
+ case class AsciiUnBase64TestCase[R](q: String, dt: DataType, r: R)
+ val testCases = Seq(
+ AsciiUnBase64TestCase("select ascii('a' collate utf8_binary)",
IntegerType, Row(97)),
+ AsciiUnBase64TestCase("select ascii('a' collate utf8_binary_lcase)",
IntegerType, Row(97)),
+ AsciiUnBase64TestCase("select unbase64('YQ==' collate utf8_binary)",
BinaryType,
+ Row(Seq(97))),
+ AsciiUnBase64TestCase("select unbase64('YQ==' collate
utf8_binary_lcase)", BinaryType,
+ Row(Seq(97)))
+ )
+ testCases.foreach(t => {
+ // Result & data type
+ checkAnswer(sql(t.q), t.r)
+ assert(sql(t.q).schema.fields.head.dataType.sameType(t.dt))
+ })
+ }
+
+ test("Chr, Base64, Decode & FormatNumber string expressions with collation")
{
+ case class DefaultCollationTestCase[R](q: String, c: String, r: R)
+ val testCases = Seq(
+ DefaultCollationTestCase("select chr(97)", "UTF8_BINARY", Row("a")),
+ DefaultCollationTestCase("select chr(97)", "UTF8_BINARY_LCASE",
Row("a")),
+ DefaultCollationTestCase("select base64('a')", "UTF8_BINARY",
Row("YQ==")),
+ DefaultCollationTestCase("select base64('a')", "UTF8_BINARY_LCASE",
Row("YQ==")),
+ DefaultCollationTestCase("select decode(encode('a', 'utf-8'), 'utf-8')",
"UTF8_BINARY",
+ Row("a")),
+ DefaultCollationTestCase("select decode(encode('a', 'utf-8'), 'utf-8')",
+ "UTF8_BINARY_LCASE", Row("a")),
+ DefaultCollationTestCase("select format_number(123.123, '###.###')",
"UTF8_BINARY",
+ Row("123.123")),
+ DefaultCollationTestCase("select format_number(123.123, '###.###')",
"UTF8_BINARY_LCASE",
+ Row("123.123"))
+ )
+ testCases.foreach(t => {
+ withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.c) {
+ // Result & data type
+ checkAnswer(sql(t.q), t.r)
+ assert(sql(t.q).schema.fields.head.dataType.sameType(StringType(t.c)))
+ }
+ })
+ }
+
+ test("Encode, ToBinary & Sentences string expressions with collation") {
+ case class EncodeToBinarySentencesTestCase[R](q: String, dt: DataType, r:
R)
+ val testCases = Seq(
+ EncodeToBinarySentencesTestCase("select encode('a' collate utf8_binary,
'utf-8')",
+ BinaryType, Row(Seq(97))),
+ EncodeToBinarySentencesTestCase("select encode('a' collate
utf8_binary_lcase, 'utf-8')",
+ BinaryType, Row(Seq(97))),
+ EncodeToBinarySentencesTestCase("select to_binary('a' collate
utf8_binary, 'utf-8')",
+ BinaryType, Row(Seq(97))),
+ EncodeToBinarySentencesTestCase("select to_binary('a' collate
utf8_binary_lcase, 'utf-8')",
Review Comment:
instead of doing `utf8_binary` & `utf8_binary_lcase` again, let's see some
examples with `unicode` & `unicode_ci`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]