dejankrak-db commented on code in PR #48608:
URL: https://github.com/apache/spark/pull/48608#discussion_r1817181507
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala:
##########
@@ -2434,829 +2447,622 @@ class CollationSQLExpressionsSuite
)
}
- test("min_by supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT min_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c',
20) AS tab(x, y);"
+ // common method for subsequent tests verifying various SQL expressions with
collations
+ private def testCollationSqlExpressionCommon(
+ query: String,
+ collation: String,
+ result: Seq[Row],
+ dataType: DataType): Unit = {
withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row("a")
- )
- )
+ // check result correctness
+ checkAnswer(sql(query), result)
// check result row data type
- val dataType = StringType(collation)
assert(sql(query).schema.head.dataType == dataType)
}
}
+ test("min_by supports collation") {
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT min_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c',
20) AS tab(x, y);"
+ val result = Seq(
+ Row("a")
+ )
+ val dataType = StringType(collation)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
+ }
+ }
+
test("max_by supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT max_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c',
20) AS tab(x, y);"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row("b")
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT max_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c',
20) AS tab(x, y);"
+ val result = Seq(
+ Row("b")
)
- // check result row data type
val dataType = StringType(collation)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array('a', 'b', 'c');"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq("a", "b", "c"))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array('a', 'b', 'c');"
+ val result = Seq(
+ Row(Seq("a", "b", "c"))
)
- // check result row data type
val dataType = ArrayType(StringType(collation), false)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_agg supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_agg(col) FROM VALUES ('a'), ('b'), ('c') AS
tab(col);"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq("a", "b", "c"))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_agg(col) FROM VALUES ('a'), ('b'), ('c') AS
tab(col);"
+ val result = Seq(
+ Row(Seq("a", "b", "c"))
)
- // check result row data type
val dataType = ArrayType(StringType(collation), false)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_contains supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_contains(array('a', 'b', 'c'), 'b');"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(true)
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_contains(array('a', 'b', 'c'), 'b');"
+ val result = Seq(
+ Row(true)
)
- // check result row data type
val dataType = BooleanType
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("arrays_overlap supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT arrays_overlap(array('a', 'b', 'c'), array('c', 'd',
'e'));"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(true)
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT arrays_overlap(array('a', 'b', 'c'), array('c', 'd',
'e'));"
+ val result = Seq(
+ Row(true)
)
- // check result row data type
val dataType = BooleanType
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_insert supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_insert(array('a', 'b', 'c', 'd'), 5, 'e');"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq("a", "b", "c", "d", "e"))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_insert(array('a', 'b', 'c', 'd'), 5, 'e');"
+ val result = Seq(
+ Row(Seq("a", "b", "c", "d", "e"))
)
- // check result row data type
val dataType = ArrayType(StringType(collation), true)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_intersect supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_intersect(array('a', 'b', 'c'), array('b', 'c',
'd'));"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq("b", "c"))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_intersect(array('a', 'b', 'c'), array('b',
'c', 'd'));"
+ val result = Seq(
+ Row(Seq("b", "c"))
)
- // check result row data type
val dataType = ArrayType(StringType(collation), false)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_join supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_join(array('hello', 'world'), ' ');"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row("hello world")
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_join(array('hello', 'world'), ' ');"
+ val result = Seq(
+ Row("hello world")
)
- // check result row data type
val dataType = StringType(collation)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_position supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_position(array('a', 'b', 'c', 'c'), 'c');"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(3)
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_position(array('a', 'b', 'c', 'c'), 'c');"
+ val result = Seq(
+ Row(3)
)
- // check result row data type
val dataType = LongType
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_size supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_size(array('a', 'b', 'c', 'c'));"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(4)
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_size(array('a', 'b', 'c', 'c'));"
+ val result = Seq(
+ Row(4)
)
- // check result row data type
val dataType = IntegerType
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_sort supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_sort(array('b', null, 'A'));"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq("A", "b", null))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_sort(array('b', null, 'A'));"
+ val result = Seq(
+ Row(Seq("A", "b", null))
)
- // check result row data type
val dataType = ArrayType(StringType(collation), true)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_except supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_except(array('a', 'b', 'c'), array('c', 'd',
'e'));"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq("a", "b"))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_except(array('a', 'b', 'c'), array('c', 'd',
'e'));"
+ val result = Seq(
+ Row(Seq("a", "b"))
)
- // check result row data type
val dataType = ArrayType(StringType(collation), false)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_union supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_union(array('a', 'b', 'c'), array('a', 'c',
'd'));"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq("a", "b", "c", "d"))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_union(array('a', 'b', 'c'), array('a', 'c',
'd'));"
+ val result = Seq(
+ Row(Seq("a", "b", "c", "d"))
)
- // check result row data type
val dataType = ArrayType(StringType(collation), false)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_compact supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_compact(array('a', 'b', null, 'c'));"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq("a", "b", "c"))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_compact(array('a', 'b', null, 'c'));"
+ val result = Seq(
+ Row(Seq("a", "b", "c"))
)
- // check result row data type
val dataType = ArrayType(StringType(collation), false)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("arrays_zip supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT arrays_zip(array('a', 'b', 'c'), array(1, 2, 3));"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq(Row("a", 1), Row("b", 2), Row("c", 3)))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT arrays_zip(array('a', 'b', 'c'), array(1, 2, 3));"
+ val result = Seq(
+ Row(Seq(Row("a", 1), Row("b", 2), Row("c", 3)))
)
- // check result row data type
val dataType = ArrayType(StructType(
StructField("0", StringType(collation), true) ::
StructField("1", IntegerType, true) :: Nil
), false)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_min supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_min(array('a', 'b', null, 'c'));"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row("a")
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_min(array('a', 'b', null, 'c'));"
+ val result = Seq(
+ Row("a")
)
- // check result row data type
val dataType = StringType(collation)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_max supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_max(array('a', 'b', null, 'c'));"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row("c")
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_max(array('a', 'b', null, 'c'));"
+ val result = Seq(
+ Row("c")
)
- // check result row data type
val dataType = StringType(collation)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_append supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_append(array('b', 'd', 'c', 'a'), 'e');"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq("b", "d", "c", "a", "e"))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_append(array('b', 'd', 'c', 'a'), 'e');"
+ val result = Seq(
+ Row(Seq("b", "d", "c", "a", "e"))
)
- // check result row data type
val dataType = ArrayType(StringType(collation), true)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_repeat supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_repeat('abc', 2);"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq("abc", "abc"))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_repeat('abc', 2);"
+ val result = Seq(
+ Row(Seq("abc", "abc"))
)
- // check result row data type
val dataType = ArrayType(StringType(collation), false)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_remove supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_remove(array('a', 'b', null, 'c'), 'b');"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq("a", null, "c"))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_remove(array('a', 'b', null, 'c'), 'b');"
+ val result = Seq(
+ Row(Seq("a", null, "c"))
)
- // check result row data type
val dataType = ArrayType(StringType(collation), true)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_prepend supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_prepend(array('b', 'd', 'c', 'a'), 'd');"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq("d", "b", "d", "c", "a"))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_prepend(array('b', 'd', 'c', 'a'), 'd');"
+ val result = Seq(
+ Row(Seq("d", "b", "d", "c", "a"))
)
- // check result row data type
val dataType = ArrayType(StringType(collation), true)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("array_distinct supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT array_distinct(array('a', 'b', 'c', null, 'c'));"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq("a", "b", "c", null))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT array_distinct(array('a', 'b', 'c', null, 'c'));"
+ val result = Seq(
+ Row(Seq("a", "b", "c", null))
)
- // check result row data type
val dataType = ArrayType(StringType(collation), true)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("collect_list supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT collect_list(col) FROM VALUES ('a'), ('b'), ('c') AS
tab(col);"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(Seq("a", "b", "c"))
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT collect_list(col) FROM VALUES ('a'), ('b'), ('c') AS
tab(col);"
+ val result = Seq(
+ Row(Seq("a", "b", "c"))
)
- // check result row data type
val dataType = ArrayType(StringType(collation), false)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("collect_set does not support collation") {
- val collation = "UNICODE"
- val query = s"SELECT collect_set(col) FROM VALUES ('a'), ('b'), ('a') AS
tab(col);"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkError(
- exception = intercept[AnalysisException] {
- sql(query)
- },
- condition = "DATATYPE_MISMATCH.UNSUPPORTED_INPUT_TYPE",
- sqlState = Some("42K09"),
- parameters = Map(
- "functionName" -> "`collect_set`",
- "dataType" -> "\"MAP\" or \"COLLATED STRING\"",
- "sqlExpr" -> "\"collect_set(col)\""),
- context = ExpectedContext(
- fragment = "collect_set(col)",
- start = 7,
- stop = 22))
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT collect_set(col) FROM VALUES ('a'), ('b'), ('a') AS
tab(col);"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(query)
+ },
+ condition = "DATATYPE_MISMATCH.UNSUPPORTED_INPUT_TYPE",
+ sqlState = Some("42K09"),
+ parameters = Map(
+ "functionName" -> "`collect_set`",
+ "dataType" -> "\"MAP\" or \"COLLATED STRING\"",
+ "sqlExpr" -> "\"collect_set(col)\""),
+ context = ExpectedContext(
+ fragment = "collect_set(col)",
+ start = 7,
+ stop = 22))
+ }
}
}
test("element_at supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT element_at(array('a', 'b', 'c'), 2);"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row("b")
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT element_at(array('a', 'b', 'c'), 2);"
+ val result = Seq(
+ Row("b")
)
- // check result row data type
val dataType = StringType(collation)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("aggregate supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT aggregate(array('a', 'b', 'c'), '', (acc, x) ->
concat(acc, x));"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row("abc")
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT aggregate(array('a', 'b', 'c'), '', (acc, x) ->
concat(acc, x));"
+ val result = Seq(
+ Row("abc")
)
- // check result row data type
val dataType = StringType(collation)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("explode supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT explode(array('a', 'b'));"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row("a"),
- Row("b")
- )
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT explode(array('a', 'b'));"
+ val result = Seq(
+ Row("a"),
+ Row("b")
)
- // check result row data type
val dataType = StringType(collation)
- assert(sql(query).schema.head.dataType == dataType)
+ testCollationSqlExpressionCommon(query, collation, result, dataType)
}
}
test("posexplode supports collation") {
- val collation = "UNICODE"
- val query = s"SELECT posexplode(array('a', 'b'));"
- withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
- checkAnswer(
- sql(query),
- Seq(
- Row(0, "a"),
- Row(1, "b")
+ testAdditionalCollations.foreach { collation =>
+ val query = "SELECT posexplode(array('a', 'b'));"
+ withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
Review Comment:
@MaxGekk, yes, I've refactored the new function and all the tests to pass
the sequence of expected types as an argument, which covered the remaining test
cases, so hopefully it should be much more readable now.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]