uros-db commented on code in PR #47372:
URL: https://github.com/apache/spark/pull/47372#discussion_r1700341445


##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala:
##########
@@ -2295,6 +2295,827 @@ class CollationSQLExpressionsSuite
     assert(typeException.getErrorClass === 
"DATATYPE_MISMATCH.UNEXPECTED_STATIC_METHOD")
   }
 
+  test("min_by supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT min_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c', 
20) AS tab(x, y);"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row("a")
+        )
+      )
+      // check result row data type
+      val dataType = StringType(collation)
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("max_by supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT max_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c', 
20) AS tab(x, y);"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row("b")
+        )
+      )
+      // check result row data type
+      val dataType = StringType(collation)
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT array('a', 'b', 'c');"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row(Seq("a", "b", "c"))
+        )
+      )
+      // check result row data type
+      val dataType = ArrayType(StringType(collation), false)
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array_agg supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT array_agg(col) FROM VALUES ('a'), ('b'), ('c') AS 
tab(col);"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row(Seq("a", "b", "c"))
+        )
+      )
+      // check result row data type
+      val dataType = ArrayType(StringType(collation), false)
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array_contains supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT array_contains(array('a', 'b', 'c'), 'b');"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row(true)
+        )
+      )
+      // check result row data type
+      val dataType = BooleanType
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("arrays_overlap supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT arrays_overlap(array('a', 'b', 'c'), array('c', 'd', 
'e'));"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row(true)
+        )
+      )
+      // check result row data type
+      val dataType = BooleanType
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array_insert supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT array_insert(array('a', 'b', 'c', 'd'), 5, 'e');"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row(Seq("a", "b", "c", "d", "e"))
+        )
+      )
+      // check result row data type
+      val dataType = ArrayType(StringType(collation), true)
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array_intersect supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT array_intersect(array('a', 'b', 'c'), array('b', 'c', 
'd'));"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row(Seq("b", "c"))
+        )
+      )
+      // check result row data type
+      val dataType = ArrayType(StringType(collation), false)
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array_join supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT array_join(array('hello', 'world'), ' ');"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row("hello world")
+        )
+      )
+      // check result row data type
+      val dataType = StringType(collation)
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array_position supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT array_position(array('a', 'b', 'c', 'c'), 'c');"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row(3)
+        )
+      )
+      // check result row data type
+      val dataType = LongType
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array_size supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT array_size(array('a', 'b', 'c', 'c'));"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row(4)
+        )
+      )
+      // check result row data type
+      val dataType = IntegerType
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array_sort supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT array_sort(array('b', null, 'a'));"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row(Seq("a", "b", null))
+        )
+      )
+      // check result row data type
+      val dataType = ArrayType(StringType(collation), true)
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array_except supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT array_except(array('a', 'b', 'c'), array('c', 'd', 
'e'));"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row(Seq("a", "b"))
+        )
+      )
+      // check result row data type
+      val dataType = ArrayType(StringType(collation), false)
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array_union supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT array_union(array('a', 'b', 'c'), array('a', 'c', 
'd'));"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row(Seq("a", "b", "c", "d"))
+        )
+      )
+      // check result row data type
+      val dataType = ArrayType(StringType(collation), false)
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array_compact supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT array_compact(array('a', 'b', null, 'c'));"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row(Seq("a", "b", "c"))
+        )
+      )
+      // check result row data type
+      val dataType = ArrayType(StringType(collation), true)
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("arrays_zip supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT arrays_zip(array('a', 'b', 'c'), array(1, 2, 3));"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row(Seq(Row("a", 1), Row("b", 2), Row("c", 3)))
+        )
+      )
+      // check result row data type
+      val dataType = ArrayType(StructType(
+        StructField("0", StringType(collation), true) ::
+          StructField("1", IntegerType, true) :: Nil
+      ), false)
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array_min supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT array_min(array('a', 'b', null, 'c'));"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row("a")
+        )
+      )
+      // check result row data type
+      val dataType = StringType(collation)
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array_max supports collation") {
+    val collation = "UNICODE"
+    val query = s"SELECT array_max(array('a', 'b', null, 'c'));"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      checkAnswer(
+        sql(query),
+        Seq(
+          Row("c")
+        )
+      )
+      // check result row data type
+      val dataType = StringType(collation)
+      assert(sql(query).schema.head.dataType == dataType)
+    }
+  }
+
+  test("array_append supports collation") {

Review Comment:
   conclusion: we should enforce that all elements in an array to have the same 
type (collation included); otherwise, fail the query @mihailom-db



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to