Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19594#discussion_r156392538
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
 ---
    @@ -114,4 +115,183 @@ object EstimationUtils {
         }
       }
     
    +  /**
    +   * Returns overlapped ranges between two histograms, in the given value 
range [newMin, newMax].
    +   */
    +  def getOverlappedRanges(
    +      leftHistogram: Histogram,
    +      rightHistogram: Histogram,
    +      newMin: Double,
    +      newMax: Double): Seq[OverlappedRange] = {
    +    val overlappedRanges = new ArrayBuffer[OverlappedRange]()
    +    // Only bins whose range intersect [newMin, newMax] have join 
possibility.
    +    val leftBins = leftHistogram.bins
    +      .filter(b => b.lo <= newMax && b.hi >= newMin)
    +    val rightBins = rightHistogram.bins
    +      .filter(b => b.lo <= newMax && b.hi >= newMin)
    +
    +    leftBins.foreach { lb =>
    +      rightBins.foreach { rb =>
    +        val (left, leftHeight) = trimBin(lb, leftHistogram.height, newMin, 
newMax)
    +        val (right, rightHeight) = trimBin(rb, rightHistogram.height, 
newMin, newMax)
    +        // Only collect overlapped ranges.
    +        if (left.lo <= right.hi && left.hi >= right.lo) {
    +          // Collect overlapped ranges.
    +          val range = if (left.lo == left.hi) {
    +            // Case1: the left bin has only one value
    +            OverlappedRange(
    +              lo = left.lo,
    +              hi = left.lo,
    +              leftNdv = 1,
    +              rightNdv = 1,
    +              leftNumRows = leftHeight,
    +              rightNumRows = rightHeight / right.ndv
    +            )
    +          } else if (right.lo == right.hi) {
    +            // Case2: the right bin has only one value
    +            OverlappedRange(
    +              lo = right.lo,
    +              hi = right.lo,
    +              leftNdv = 1,
    +              rightNdv = 1,
    +              leftNumRows = leftHeight / left.ndv,
    +              rightNumRows = rightHeight
    +            )
    +          } else if (right.lo >= left.lo && right.hi >= left.hi) {
    +            // Case3: the left bin is "smaller" than the right bin
    +            //      left.lo            right.lo     left.hi          
right.hi
    +            // 
--------+------------------+------------+----------------+------->
    +            val leftRatio = (left.hi - right.lo) / (left.hi - left.lo)
    +            val rightRatio = (left.hi - right.lo) / (right.hi - right.lo)
    +            if (leftRatio == 0) {
    --- End diff --
    
    it's more understandable to write `if (right.lo == left.hi)`


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to