uros-db commented on code in PR #47372:
URL: https://github.com/apache/spark/pull/47372#discussion_r1700208752
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala:
##########
@@ -2295,6 +2295,827 @@ class CollationSQLExpressionsSuite
assert(typeException.getErrorClass ===
"DATATYPE_MISMATCH.UNEXPECTED_STATIC_METHOD")
}
+ test("min_by supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT min_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c',
20) AS tab(x, y);"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row("a")
+ )
+ )
+ // check result row data type
+ val dataType = StringType(collation)
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("max_by supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT max_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c',
20) AS tab(x, y);"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row("b")
+ )
+ )
+ // check result row data type
+ val dataType = StringType(collation)
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT array('a', 'b', 'c');"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row(Seq("a", "b", "c"))
+ )
+ )
+ // check result row data type
+ val dataType = ArrayType(StringType(collation), false)
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array_agg supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT array_agg(col) FROM VALUES ('a'), ('b'), ('c') AS
tab(col);"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row(Seq("a", "b", "c"))
+ )
+ )
+ // check result row data type
+ val dataType = ArrayType(StringType(collation), false)
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array_contains supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT array_contains(array('a', 'b', 'c'), 'b');"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row(true)
+ )
+ )
+ // check result row data type
+ val dataType = BooleanType
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("arrays_overlap supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT arrays_overlap(array('a', 'b', 'c'), array('c', 'd',
'e'));"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row(true)
+ )
+ )
+ // check result row data type
+ val dataType = BooleanType
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array_insert supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT array_insert(array('a', 'b', 'c', 'd'), 5, 'e');"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row(Seq("a", "b", "c", "d", "e"))
+ )
+ )
+ // check result row data type
+ val dataType = ArrayType(StringType(collation), true)
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array_intersect supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT array_intersect(array('a', 'b', 'c'), array('b', 'c',
'd'));"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row(Seq("b", "c"))
+ )
+ )
+ // check result row data type
+ val dataType = ArrayType(StringType(collation), false)
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array_join supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT array_join(array('hello', 'world'), ' ');"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row("hello world")
+ )
+ )
+ // check result row data type
+ val dataType = StringType(collation)
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array_position supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT array_position(array('a', 'b', 'c', 'c'), 'c');"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row(3)
+ )
+ )
+ // check result row data type
+ val dataType = LongType
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array_size supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT array_size(array('a', 'b', 'c', 'c'));"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row(4)
+ )
+ )
+ // check result row data type
+ val dataType = IntegerType
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array_sort supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT array_sort(array('b', null, 'a'));"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row(Seq("a", "b", null))
+ )
+ )
+ // check result row data type
+ val dataType = ArrayType(StringType(collation), true)
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array_except supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT array_except(array('a', 'b', 'c'), array('c', 'd',
'e'));"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row(Seq("a", "b"))
+ )
+ )
+ // check result row data type
+ val dataType = ArrayType(StringType(collation), false)
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array_union supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT array_union(array('a', 'b', 'c'), array('a', 'c',
'd'));"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row(Seq("a", "b", "c", "d"))
+ )
+ )
+ // check result row data type
+ val dataType = ArrayType(StringType(collation), false)
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array_compact supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT array_compact(array('a', 'b', null, 'c'));"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row(Seq("a", "b", "c"))
+ )
+ )
+ // check result row data type
+ val dataType = ArrayType(StringType(collation), true)
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("arrays_zip supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT arrays_zip(array('a', 'b', 'c'), array(1, 2, 3));"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row(Seq(Row("a", 1), Row("b", 2), Row("c", 3)))
+ )
+ )
+ // check result row data type
+ val dataType = ArrayType(StructType(
+ StructField("0", StringType(collation), true) ::
+ StructField("1", IntegerType, true) :: Nil
+ ), false)
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array_min supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT array_min(array('a', 'b', null, 'c'));"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row("a")
+ )
+ )
+ // check result row data type
+ val dataType = StringType(collation)
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array_max supports collation") {
+ val collation = "UNICODE"
+ val query = s"SELECT array_max(array('a', 'b', null, 'c'));"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkAnswer(
+ sql(query),
+ Seq(
+ Row("c")
+ )
+ )
+ // check result row data type
+ val dataType = StringType(collation)
+ assert(sql(query).schema.head.dataType == dataType)
+ }
+ }
+
+ test("array_append supports collation") {
Review Comment:
iiuc, arrays in spark can only hold elements of a single data type (i.e.
dataType must be uniform within an ArrayType)
with this in mind - we should probably look into why we are allowed to do
`array('a', 'b' COLLATE UTF8_LCASE)` in the first place; @mihailom-db and
@stevomitric let's discuss offline and follow up asap
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]