imback82 commented on a change in pull request #28676:
URL: https://github.com/apache/spark/pull/28676#discussion_r451259932
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
##########
@@ -60,6 +62,66 @@ case class BroadcastHashJoinExec(
}
}
+ override def outputPartitioning: Partitioning = {
+ joinType match {
+ case _: InnerLike =>
+ streamedPlan.outputPartitioning match {
+ case h: HashPartitioning =>
PartitioningCollection(expandOutputPartitioning(h))
+ case c: PartitioningCollection =>
+ def expand(partitioning: PartitioningCollection): Partitioning = {
+ PartitioningCollection(partitioning.partitionings.flatMap {
+ case h: HashPartitioning => expandOutputPartitioning(h)
+ case c: PartitioningCollection => Seq(expand(c))
+ case other => Seq(other)
+ })
+ }
+ expand(c)
+ case other => other
+ }
+ case _ => streamedPlan.outputPartitioning
+ }
+ }
+
+ // An one-to-many mapping from a streamed key to build keys.
+ private lazy val streamedKeyToBuildKeyMapping = {
+ val mapping = mutable.Map.empty[Expression, Seq[Expression]]
+ streamedKeys.zip(buildKeys).foreach {
+ case (streamedKey, buildKey) =>
+ val key = streamedKey.canonicalized
+ mapping.get(key) match {
+ case Some(v) => mapping.put(key, v :+ buildKey)
+ case None => mapping.put(key, Seq(buildKey))
+ }
+ }
+ mapping.toMap
+ }
+
+ // Expands the given partitioning by substituting streamed keys with build
keys.
+ // For example, if the expressions for the given partitioning are Seq("a",
"b", "c")
+ // where the streamed keys are Seq("b", "c") and the build keys are Seq("x",
"y"),
+ // the expanded partitioning will have the following expressions:
+ // Seq("a", "b", "c"), Seq("a", "b", "y"), Seq("a", "x", "c"), Seq("a", "x",
"y").
+ private def expandOutputPartitioning(partitioning: HashPartitioning):
Seq[HashPartitioning] = {
+ def generateExprCombinations(
+ current: Seq[Expression],
+ accumulated: Seq[Expression],
+ all: mutable.ListBuffer[Seq[Expression]]): Unit = {
Review comment:
Changed. Thanks for the suggestion.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]