MaxGekk commented on code in PR #48400:
URL: https://github.com/apache/spark/pull/48400#discussion_r1806135863
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala:
##########
@@ -397,18 +397,15 @@ trait JoinSelectionHelper extends Logging {
protected def hashJoinSupported
(leftKeys: Seq[Expression], rightKeys: Seq[Expression]): Boolean = {
- val result = leftKeys.concat(rightKeys).forall(e =>
UnsafeRowUtils.isBinaryStable(e.dataType))
- if (!result) {
- val keysNotSupportingHashJoin = leftKeys.concat(rightKeys).filterNot(
- e => UnsafeRowUtils.isBinaryStable(e.dataType))
- logWarning(log"Hash based joins are not supported due to joining on keys
that don't " +
- log"support binary equality. Keys not supporting hash joins: " +
- log"${
- MDC(HASH_JOIN_KEYS, keysNotSupportingHashJoin.map(
- e => e.toString + " due to DataType: " +
e.dataType.typeName).mkString(", "))
- }")
- }
- result
+ val keysNotSupportingHashJoin = leftKeys.concat(rightKeys).filterNot(
+ e => UnsafeRowUtils.isBinaryStable(e.dataType))
+ // `RewriteCollationJoin` should have been applied before this rule, so
all keys in the
+ // join conditions should be binary stable (i.e. support binary equality
comparison).
+ assert(keysNotSupportingHashJoin.isEmpty, "Hash based joins are not
supported due to " +
Review Comment:
How about to raise `SparkException.internalError` instead of the assert?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]