beliefer commented on code in PR #35041:
URL: https://github.com/apache/spark/pull/35041#discussion_r851111216


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala:
##########
@@ -317,6 +274,78 @@ case class Percentile(
       bis.close()
     }
   }
+}
+
+/**
+ * The Percentile aggregate function returns the exact percentile(s) of 
numeric column `expr` at
+ * the given percentage(s) with value range in [0.0, 1.0].
+ *
+ * Because the number of elements and their partial order cannot be determined 
in advance.
+ * Therefore we have to store all the elements in memory, and so notice that 
too many elements can
+ * cause GC paused and eventually OutOfMemory Errors.
+ *
+ * @param child child expression that produce numeric column value with 
`child.eval(inputRow)`
+ * @param percentageExpression Expression that represents a single percentage 
value or an array of
+ *                             percentage values. Each percentage value must 
be in the range
+ *                             [0.0, 1.0].
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage =
+    """
+      _FUNC_(col, percentage [, frequency]) - Returns the exact percentile 
value of numeric
+       or ansi interval column `col` at the given percentage. The value of 
percentage must be
+       between 0.0 and 1.0. The value of frequency should be positive integral
+
+      _FUNC_(col, array(percentage1 [, percentage2]...) [, frequency]) - 
Returns the exact
+      percentile value array of numeric column `col` at the given 
percentage(s). Each value
+      of the percentage array must be between 0.0 and 1.0. The value of 
frequency should be
+      positive integral
+
+      """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col, 0.3) FROM VALUES (0), (10) AS tab(col);
+       3.0
+      > SELECT _FUNC_(col, array(0.25, 0.75)) FROM VALUES (0), (10) AS 
tab(col);
+       [2.5,7.5]
+      > SELECT _FUNC_(col, 0.5) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL 
'10' MONTH) AS tab(col);
+       5.0
+      > SELECT _FUNC_(col, array(0.2, 0.5)) FROM VALUES (INTERVAL '0' SECOND), 
(INTERVAL '10' SECOND) AS tab(col);
+       [2000000.0,5000000.0]
+  """,
+  group = "agg_funcs",
+  since = "2.1.0")
+// scalastyle:on line.size.limit
+case class Percentile(
+    child: Expression,
+    percentageExpression: Expression,
+    frequencyExpression : Expression,
+    mutableAggBufferOffset: Int = 0,
+    inputAggBufferOffset: Int = 0,
+    reverse: Boolean = false) extends PercentileBase {

Review Comment:
   order by col with desc



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to