cloud-fan commented on code in PR #35041:
URL: https://github.com/apache/spark/pull/35041#discussion_r851923611


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala:
##########
@@ -227,40 +171,49 @@ case class Percentile(
 
   /**
    * Get the percentile value.
-   *
    * This function has been based upon similar function from HIVE
    * `org.apache.hadoop.hive.ql.udf.UDAFPercentile.getPercentile()`.
    */
-  private def getPercentile(aggreCounts: Seq[(AnyRef, Long)], position: 
Double): Double = {
+  private def getPercentile(
+      accumulatedCounts: Seq[(AnyRef, Long)], position: Double): Double = {
     // We may need to do linear interpolation to get the exact percentile
     val lower = position.floor.toLong
     val higher = position.ceil.toLong
 
     // Use binary search to find the lower and the higher position.
-    val countsArray = aggreCounts.map(_._2).toArray[Long]
-    val lowerIndex = binarySearchCount(countsArray, 0, aggreCounts.size, lower 
+ 1)
-    val higherIndex = binarySearchCount(countsArray, 0, aggreCounts.size, 
higher + 1)
+    val countsArray = accumulatedCounts.map(_._2).toArray[Long]
+    val lowerIndex = binarySearchCount(countsArray, 0, accumulatedCounts.size, 
lower + 1)
+    val higherIndex = binarySearchCount(countsArray, 0, 
accumulatedCounts.size, higher + 1)
 
-    val lowerKey = aggreCounts(lowerIndex)._1
+    val lowerKey = accumulatedCounts(lowerIndex)._1
     if (higher == lower) {
       // no interpolation needed because position does not have a fraction
       return toDoubleValue(lowerKey)
     }
 
-    val higherKey = aggreCounts(higherIndex)._1
+    val higherKey = accumulatedCounts(higherIndex)._1
     if (higherKey == lowerKey) {
       // no interpolation needed because lower position and higher position 
has the same key
       return toDoubleValue(lowerKey)
     }
 
-    // Linear interpolation to get the exact percentile
-    (higher - position) * toDoubleValue(lowerKey) + (position - lower) * 
toDoubleValue(higherKey)
+    if (interpolate) {
+      // Linear interpolation to get the exact percentile
+      (higher - position) * toDoubleValue(lowerKey) + (position - lower) * 
toDoubleValue(higherKey)
+    } else {
+      toDoubleValue(lowerKey)
+    }
   }
 
+  /**
+   * Whether value should be interpolated
+   */
+  protected def interpolate: Boolean

Review Comment:
   nit: we can put it near `val reverse: Boolean`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to