uros-db commented on code in PR #47154:
URL: https://github.com/apache/spark/pull/47154#discussion_r1687958443
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala:
##########
@@ -1789,44 +1798,90 @@ class CollationSQLExpressionsSuite
s"named_struct('f2', collate('$elt', '${t.collationId}')), 'f3',
1)").mkString(",")
}.mkString(",")
- val tableName = s"t_${t.collationId}_mode_nested_struct"
+ val tableName = s"t_${t.collationId}_mode_nested_struct1"
withTable(tableName) {
sql(s"CREATE TABLE ${tableName}(i STRUCT<f1: STRUCT<f2: STRING COLLATE
" +
t.collationId + ">, f3: INT>) USING parquet")
sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
val query = s"SELECT lower(mode(i).f1.f2) FROM ${tableName}"
- if(t.collationId == "UTF8_LCASE" ||
- t.collationId == "unicode_ci" ||
- t.collationId == "unicode") {
- // Cannot resolve "mode(i)" due to data type mismatch:
- // Input to function mode was a complex type with strings collated
on non-binary
- // collations, which is not yet supported.. SQLSTATE: 42K09; line 1
pos 13;
- val params = Seq(("sqlExpr", "\"mode(i)\""),
- ("msg", "The input to the function 'mode' " +
- "was a type of binary-unstable type that is not currently
supported by mode."),
- ("hint", "")).toMap
- checkError(
- exception = intercept[AnalysisException] {
- sql(query)
- },
- errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
- parameters = params,
- queryContext = Array(
- ExpectedContext(objectType = "",
- objectName = "",
- startIndex = 13,
- stopIndex = 19,
- fragment = "mode(i)")
- )
- )
- } else {
- checkAnswer(sql(query), Row(t.result))
- }
+ checkAnswer(sql(query), Row(t.result))
}
})
}
test("Support mode for string expression with collated strings in array
complex type") {
+ case class ModeTestCase[R](collationId: String, bufferValues: Map[String,
Long], result: R)
+ val testCases = Seq(
+ ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+ ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+ ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+ ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
+ )
+ testCases.foreach(t => {
+ val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
+ (0L to numRepeats).map(_ => s"array(named_struct('f2', " +
+ s"collate('$elt', '${t.collationId}'), 'f3', 1))").mkString(",")
+ }.mkString(",")
+
+ val tableName = s"t_${t.collationId}_mode_nested_struct2"
+ withTable(tableName) {
+ sql(s"CREATE TABLE ${tableName}(" +
+ s"i ARRAY< STRUCT<f2: STRING COLLATE ${t.collationId}, f3: INT>>)" +
+ s" USING parquet")
+ sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
+ val query = s"SELECT lower(element_at(mode(i).f2, 1)) FROM
${tableName}"
+ checkAnswer(sql(query), Row(t.result))
+ }
+ })
+ }
+
+ test("Support mode for string expression with collated strings in 3D array
type") {
Review Comment:
This recursive implementation does not seem very readable, have you tried
something like I proposed above (logic similar to `RewriteCollationJoin`):
can we just have:
`def getCollationAwareBufferForDataType ...`
which does something like
```
dataType match {
// Short-circuit if there is no collation.
case _ if UnsafeRowUtils.isBinaryStable(child.dataType) => ...
case st: StringType =>
getCollationAwareBufferForStringType(...)
case at: ArrayType =>
getCollationAwareBufferForArrayType(...)
case st : StructType =>
getCollationAwareBufferForStructType(...)
// Write a comment to point out that don't support MapType (and
possibly some other types)...
case _ => ...
}
```
And then we can also have:
```
def getCollationAwareBufferForStringType ...
def getCollationAwareBufferForArrayType ...
def getCollationAwareBufferForStructType ...
```
(these can call `getCollationAwareBufferForDataType`, instead of calling
each other, to ensure full recursive support)
can we limit the implementation like this? this should help avoid handling
"StructType" in `private def recursivelyGetBufferForArrayType` (and vice versa)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]