GideonPotok commented on code in PR #47154:
URL: https://github.com/apache/spark/pull/47154#discussion_r1756858343
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala:
##########
@@ -1852,40 +1888,67 @@ class CollationSQLExpressionsSuite
s"array(collate('$elt', '${t.collationId}'))), 'f3',
1))").mkString(",")
}.mkString(",")
- val tableName = s"t_${t.collationId}_mode_nested_struct"
+ val tableName = s"t_${t.collationId}_mode_highly_nested_struct"
withTable(tableName) {
sql(s"CREATE TABLE ${tableName}(" +
s"i ARRAY<STRUCT<s1: STRUCT<a2: ARRAY<STRING COLLATE
${t.collationId}>>, f3: INT>>)" +
s" USING parquet")
sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
val query = s"SELECT lower(element_at(element_at(mode(i), 1).s1.a2,
1)) FROM ${tableName}"
- if(t.collationId == "UTF8_LCASE" ||
- t.collationId == "unicode_ci" || t.collationId == "unicode") {
- val params = Seq(("sqlExpr", "\"mode(i)\""),
- ("msg", "The input to the function 'mode' was a type" +
- " of binary-unstable type that is not currently supported by
mode."),
- ("hint", "")).toMap
- checkError(
- exception = intercept[AnalysisException] {
- sql(query)
- },
- errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
- parameters = params,
- queryContext = Array(
- ExpectedContext(objectType = "",
- objectName = "",
- startIndex = 35,
- stopIndex = 41,
- fragment = "mode(i)")
- )
- )
- } else {
- checkAnswer(sql(query), Row(t.result))
- }
+ checkAnswer(sql(query), Row(t.result))
}
})
}
+ test("Support mode for string expression with collated strings in " +
+ "recursively nested struct with map with collated keys") {
+ case class ModeTestCase[R](collationId: String, bufferValues:
Map[String, Long], result: R)
+ val testCases = Seq(
+ ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{a
-> 1}"),
+ ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{a ->
1}"),
+ ModeTestCase("utf8_lcase", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{b
-> 1}"),
+ ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{b
-> 1}")
+ )
+ testCases.foreach(t => {
+ val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
+ (0L to numRepeats).map(_ =>
+ s"named_struct('m1', map(collate('$elt', '${t.collationId}'),
1))").mkString(",")
+ }.mkString(",")
+
+ val tableName = s"t_${t.collationId}_mode_nested_map_struct1"
+ withTable(tableName) {
+ val creation = s"CREATE TABLE ${tableName}(i STRUCT<m1: MAP<STRING
COLLATE " +
+ t.collationId + ", INT>>) USING parquet"
+ sql(creation)
+ val insertion = s"INSERT INTO ${tableName} VALUES " + valuesToAdd
+ sql(insertion)
+ val query = s"SELECT lower(cast(mode(i).m1 as string))" +
+ s" FROM ${tableName}"
+ if (t.collationId == "utf8_binary") {
+ checkAnswer(sql(query), Row(t.result))
+ } else {
+ checkError(
+ exception = intercept[AnalysisException] {
+ val testQuery = sql(query)
+ testQuery.collect()
+ },
+ errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_MODE_DATA_TYPE",
Review Comment:
@MaxGekk
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]