uros-db commented on code in PR #48386:
URL: https://github.com/apache/spark/pull/48386#discussion_r1798982022
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala:
##########
@@ -173,8 +173,9 @@ abstract class HashMapGenerator(
${hashBytes(bytes)}
"""
}
- case st: StringType if st.supportsBinaryEquality =>
hashBytes(s"$input.getBytes()")
- case st: StringType if !st.supportsBinaryEquality =>
+ case st: StringType if st.supportsBinaryEquality &&
!st.usesTrimCollation =>
+ hashBytes(s"$input.getBytes()")
+ case st: StringType if !st.supportsBinaryEquality ||
st.usesTrimCollation =>
Review Comment:
ditto (about adding the TODO comment)
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala:
##########
@@ -205,7 +205,9 @@ object UnsafeRowUtils {
* can lead to rows being semantically equal even though their binary
representations differ).
*/
def isBinaryStable(dataType: DataType): Boolean =
!dataType.existsRecursively {
- case st: StringType =>
!CollationFactory.fetchCollation(st.collationId).supportsBinaryEquality
+ case st: StringType =>
+ val collation = CollationFactory.fetchCollation(st.collationId)
+ (!collation.supportsBinaryEquality || collation.supportsSpaceTrimming)
Review Comment:
ditto (about adding the TODO comment)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]