Github user wzhfy commented on a diff in the pull request:
https://github.com/apache/spark/pull/19714#discussion_r153969180
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala ---
@@ -149,10 +147,47 @@ abstract class SparkStrategies extends
QueryPlanner[SparkPlan] {
case _ => false
}
+ private def broadcastSide(
+ canBuildLeft: Boolean,
+ canBuildRight: Boolean,
+ left: LogicalPlan,
+ right: LogicalPlan): BuildSide = {
+
+ def smallerSide =
+ if (right.stats.sizeInBytes <= left.stats.sizeInBytes) BuildRight
else BuildLeft
+
+ val buildRight = canBuildRight && right.stats.hints.broadcast
+ val buildLeft = canBuildLeft && left.stats.hints.broadcast
+
+
+ if (buildRight && buildLeft) {
+ // Broadcast smaller side base on its estimated physical size
+ // if both sides have broadcast hint
+ smallerSide
+ } else if (buildRight) {
+ BuildRight
+ } else if (buildLeft) {
+ BuildLeft
+ } else if (canBuildRight && canBuildLeft) {
+ // for the last default broadcast nested loop join
+ smallerSide
+ } else {
+ throw new AnalysisException(
+ "Can not decide to use which side for BuildSide for this join")
--- End diff --
to be consistent with the method name, how about: `Can not decide which
side to broadcast for this join`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]