Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19594#discussion_r156388437
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
 ---
    @@ -114,4 +115,183 @@ object EstimationUtils {
         }
       }
     
    +  /**
    +   * Returns overlapped ranges between two histograms, in the given value 
range [newMin, newMax].
    +   */
    +  def getOverlappedRanges(
    +      leftHistogram: Histogram,
    +      rightHistogram: Histogram,
    +      newMin: Double,
    +      newMax: Double): Seq[OverlappedRange] = {
    +    val overlappedRanges = new ArrayBuffer[OverlappedRange]()
    +    // Only bins whose range intersect [newMin, newMax] have join 
possibility.
    +    val leftBins = leftHistogram.bins
    +      .filter(b => b.lo <= newMax && b.hi >= newMin)
    +    val rightBins = rightHistogram.bins
    +      .filter(b => b.lo <= newMax && b.hi >= newMin)
    +
    +    leftBins.foreach { lb =>
    +      rightBins.foreach { rb =>
    --- End diff --
    
    nit:
    ```
    for {
      leftBin <- leftBins
      rightBin <- rightBins
    } yield {
      ...
      OverlappedRange ...
    }
    ```
    Then we can omit `val overlappedRanges = new ArrayBuffer[OverlappedRange]()`


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to