viirya commented on a change in pull request #29104:
URL: https://github.com/apache/spark/pull/29104#discussion_r472551043
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
##########
@@ -903,15 +910,61 @@ private[joins] object LongHashedRelation {
if (!rowKey.isNullAt(0)) {
val key = rowKey.getLong(0)
map.append(key, unsafeRow)
+ } else if (isNullAware) {
+ return EmptyHashedRelationWithAllNullKeys
}
}
map.optimize()
new LongHashedRelation(numFields, map)
}
}
+/**
+ * Common trait with dummy implementation for NAAJ special HashedRelation
+ * EmptyHashedRelation
+ * EmptyHashedRelationWithAllNullKeys
+ */
+trait NullAwareHashedRelation extends HashedRelation with Externalizable {
+ override def get(key: InternalRow): Iterator[InternalRow] = {
+ throw new UnsupportedOperationException
+ }
+
+ override def getValue(key: InternalRow): InternalRow = {
+ throw new UnsupportedOperationException
+ }
+
+ override def keyIsUnique: Boolean = true
+
+ override def keys(): Iterator[InternalRow] = {
+ throw new UnsupportedOperationException
+ }
+
+ override def close(): Unit = {}
+
+ override def writeExternal(out: ObjectOutput): Unit = {}
+
+ override def readExternal(in: ObjectInput): Unit = {}
+
+ override def estimatedSize: Long = 0
+}
+
+/**
+ * A special HashedRelation indicates it built from a empty
input:Iterator[InternalRow].
+ */
+object EmptyHashedRelation extends NullAwareHashedRelation {
+ override def asReadOnlyCopy(): EmptyHashedRelation.type = this
+}
+
+/**
+ * A special HashedRelation indicates it built from a non-empty
input:Iterator[InternalRow],
+ * which contains all null columns key.
+ */
+object EmptyHashedRelationWithAllNullKeys extends NullAwareHashedRelation {
+ override def asReadOnlyCopy(): EmptyHashedRelationWithAllNullKeys.type = this
Review comment:
This object name really confuses. `EmptyHashedRelation` is from empty
input, and `EmptyHashedRelationWithAllNullKeys` is from non-empty input.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]