Github user wzhfy commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19531#discussion_r147547985
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
 ---
    @@ -28,45 +28,43 @@ import 
org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Join, Statistics
     import 
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils._
     
     
    -object JoinEstimation extends Logging {
    +case class JoinEstimation(join: Join) extends Logging {
    +
    +  private val leftStats = join.left.stats
    +  private val rightStats = join.right.stats
    +  private val keyStatsAfterJoin = new mutable.HashMap[Attribute, 
ColumnStat]()
    +
       /**
        * Estimate statistics after join. Return `None` if the join type is not 
supported, or we don't
        * have enough statistics for estimation.
        */
    -  def estimate(join: Join): Option[Statistics] = {
    +  def estimate: Option[Statistics] = {
         join.joinType match {
           case Inner | Cross | LeftOuter | RightOuter | FullOuter =>
    -        InnerOuterEstimation(join).doEstimate()
    +        estimateInnerOuterJoin()
           case LeftSemi | LeftAnti =>
    -        LeftSemiAntiEstimation(join).doEstimate()
    +        estimateLeftSemiAntiJoin()
           case _ =>
             logDebug(s"[CBO] Unsupported join type: ${join.joinType}")
             None
         }
       }
    -}
    -
    -case class InnerOuterEstimation(join: Join) extends Logging {
    -
    -  private val leftStats = join.left.stats
    -  private val rightStats = join.right.stats
     
       /**
        * Estimate output size and number of rows after a join operator, and 
update output column stats.
        */
    -  def doEstimate(): Option[Statistics] = join match {
    +  private def estimateInnerOuterJoin(): Option[Statistics] = join match {
         case _ if !rowCountsExist(join.left, join.right) =>
           None
     
         case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, _, _, _) =>
           // 1. Compute join selectivity
           val joinKeyPairs = extractJoinKeysWithColStats(leftKeys, rightKeys)
    -      val selectivity = joinSelectivity(joinKeyPairs)
    +      val innerJoinedRows = joinCardinality(joinKeyPairs)
    --- End diff --
    
    Thanks, I'll make the changes.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to