kazuyukitanimura commented on code in PR #1424: URL: https://github.com/apache/datafusion-comet/pull/1424#discussion_r1965987283
########## spark/src/main/scala/org/apache/comet/rules/RewriteJoin.scala: ########## @@ -48,7 +64,7 @@ object RewriteJoin extends JoinSelectionHelper { def rewrite(plan: SparkPlan): SparkPlan = plan match { case smj: SortMergeJoinExec => - getBuildSide(smj.joinType) match { + getSmjBuildSide(smj) match { case Some(BuildRight) if smj.joinType == LeftSemi => // TODO this was added as a workaround for TPC-DS q14 hanging and needs // further investigation Review Comment: Not a blocker but just wondering if this this because we were choosing wrong side? ########## spark/src/main/scala/org/apache/comet/rules/RewriteJoin.scala: ########## @@ -31,14 +32,29 @@ import org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoin */ object RewriteJoin extends JoinSelectionHelper { - private def getBuildSide(joinType: JoinType): Option[BuildSide] = { - if (canBuildShuffledHashJoinRight(joinType)) { - Some(BuildRight) - } else if (canBuildShuffledHashJoinLeft(joinType)) { - Some(BuildLeft) - } else { - None + private def getSmjBuildSide(join: SortMergeJoinExec): Option[BuildSide] = { + val leftBuildable = canBuildShuffledHashJoinLeft(join.joinType) + val rightBuildable = canBuildShuffledHashJoinRight(join.joinType) + if (!leftBuildable && !rightBuildable) { + return None } + if (!leftBuildable) { + return Some(BuildRight) + } + if (!rightBuildable) { + return Some(BuildLeft) + } + val side = join.logicalLink + .flatMap { + case join: Join => Some(getOptimalBuildSide(join)) + case _ => None + } + .getOrElse { + // If smj has no logical link, or its logical link is not a join, + // then we always choose left as build side. + BuildLeft Review Comment: We previously preferring right as ``` if (canBuildShuffledHashJoinRight(joinType)) { Some(BuildRight) } else if (canBuildShuffledHashJoinLeft(joinType)) { Some(BuildLeft) } else { None } ``` Is this a behavior change? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org