GideonPotok commented on code in PR #47154:
URL: https://github.com/apache/spark/pull/47154#discussion_r1687941219
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala:
##########
@@ -1789,44 +1798,90 @@ class CollationSQLExpressionsSuite
s"named_struct('f2', collate('$elt', '${t.collationId}')), 'f3',
1)").mkString(",")
}.mkString(",")
- val tableName = s"t_${t.collationId}_mode_nested_struct"
+ val tableName = s"t_${t.collationId}_mode_nested_struct1"
withTable(tableName) {
sql(s"CREATE TABLE ${tableName}(i STRUCT<f1: STRUCT<f2: STRING COLLATE
" +
t.collationId + ">, f3: INT>) USING parquet")
sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
val query = s"SELECT lower(mode(i).f1.f2) FROM ${tableName}"
- if(t.collationId == "UTF8_LCASE" ||
- t.collationId == "unicode_ci" ||
- t.collationId == "unicode") {
- // Cannot resolve "mode(i)" due to data type mismatch:
- // Input to function mode was a complex type with strings collated
on non-binary
- // collations, which is not yet supported.. SQLSTATE: 42K09; line 1
pos 13;
- val params = Seq(("sqlExpr", "\"mode(i)\""),
- ("msg", "The input to the function 'mode' " +
- "was a type of binary-unstable type that is not currently
supported by mode."),
- ("hint", "")).toMap
- checkError(
- exception = intercept[AnalysisException] {
- sql(query)
- },
- errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
- parameters = params,
- queryContext = Array(
- ExpectedContext(objectType = "",
- objectName = "",
- startIndex = 13,
- stopIndex = 19,
- fragment = "mode(i)")
- )
- )
- } else {
- checkAnswer(sql(query), Row(t.result))
- }
+ checkAnswer(sql(query), Row(t.result))
}
})
}
test("Support mode for string expression with collated strings in array
complex type") {
+ case class ModeTestCase[R](collationId: String, bufferValues: Map[String,
Long], result: R)
+ val testCases = Seq(
+ ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+ ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+ ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+ ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
+ )
+ testCases.foreach(t => {
+ val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
+ (0L to numRepeats).map(_ => s"array(named_struct('f2', " +
+ s"collate('$elt', '${t.collationId}'), 'f3', 1))").mkString(",")
+ }.mkString(",")
+
+ val tableName = s"t_${t.collationId}_mode_nested_struct2"
+ withTable(tableName) {
+ sql(s"CREATE TABLE ${tableName}(" +
+ s"i ARRAY< STRUCT<f2: STRING COLLATE ${t.collationId}, f3: INT>>)" +
+ s" USING parquet")
+ sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
+ val query = s"SELECT lower(element_at(mode(i).f2, 1)) FROM
${tableName}"
+ checkAnswer(sql(query), Row(t.result))
+ }
+ })
+ }
+
+ test("Support mode for string expression with collated strings in 3D array
type") {
Review Comment:
@uros-db
I have implemented a test similar to the one you suggested under the name
`Support mode for string expression with collated complex type - Highly nested.`

I like this test because by testing a type where an array contains another
array directly (i.e., `array<array<...>>`), we can verify the execution path
where `getBufferForArrayType` calls `recursivelyGetBufferForArrayType`.
To ensure thorough testing, I may even want to extend this to a 4D array, to
confirm that `recursivelyGetBufferForArrayType` can successfully call itself.
Essentially, having tests for 3D (and 4D) arrays ensures more comprehensive
_branch coverage_. Without these tests, we would primarily achieve only _line
coverage_, missing out on deeper validation of recursive behavior.
Thoughts?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]