cloud-fan commented on code in PR #35041:
URL: https://github.com/apache/spark/pull/35041#discussion_r851093553
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala:
##########
@@ -317,6 +274,78 @@ case class Percentile(
bis.close()
}
}
+}
+
+/**
+ * The Percentile aggregate function returns the exact percentile(s) of
numeric column `expr` at
+ * the given percentage(s) with value range in [0.0, 1.0].
+ *
+ * Because the number of elements and their partial order cannot be determined
in advance.
+ * Therefore we have to store all the elements in memory, and so notice that
too many elements can
+ * cause GC paused and eventually OutOfMemory Errors.
+ *
+ * @param child child expression that produce numeric column value with
`child.eval(inputRow)`
+ * @param percentageExpression Expression that represents a single percentage
value or an array of
+ * percentage values. Each percentage value must
be in the range
+ * [0.0, 1.0].
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage =
+ """
+ _FUNC_(col, percentage [, frequency]) - Returns the exact percentile
value of numeric
+ or ansi interval column `col` at the given percentage. The value of
percentage must be
+ between 0.0 and 1.0. The value of frequency should be positive integral
+
+ _FUNC_(col, array(percentage1 [, percentage2]...) [, frequency]) -
Returns the exact
+ percentile value array of numeric column `col` at the given
percentage(s). Each value
+ of the percentage array must be between 0.0 and 1.0. The value of
frequency should be
+ positive integral
+
+ """,
+ examples = """
+ Examples:
+ > SELECT _FUNC_(col, 0.3) FROM VALUES (0), (10) AS tab(col);
+ 3.0
+ > SELECT _FUNC_(col, array(0.25, 0.75)) FROM VALUES (0), (10) AS
tab(col);
+ [2.5,7.5]
+ > SELECT _FUNC_(col, 0.5) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL
'10' MONTH) AS tab(col);
+ 5.0
+ > SELECT _FUNC_(col, array(0.2, 0.5)) FROM VALUES (INTERVAL '0' SECOND),
(INTERVAL '10' SECOND) AS tab(col);
+ [2000000.0,5000000.0]
+ """,
+ group = "agg_funcs",
+ since = "2.1.0")
+// scalastyle:on line.size.limit
+case class Percentile(
+ child: Expression,
+ percentageExpression: Expression,
+ frequencyExpression : Expression,
+ mutableAggBufferOffset: Int = 0,
+ inputAggBufferOffset: Int = 0,
+ reverse: Boolean = false) extends PercentileBase {
+
+ def this(child: Expression, percentageExpression: Expression) = {
+ this(child, percentageExpression, Literal(1L), 0, 0)
+ }
+
+ def this(child: Expression, percentageExpression: Expression, frequency:
Expression) = {
+ this(child, percentageExpression, frequency, 0, 0)
+ }
+
+ def this(child: Expression, percentageExpression: Expression, reverse:
Boolean) = {
+ this(child, percentageExpression, Literal(1L), reverse = reverse)
+ }
+
+ override def prettyName: String = "percentile"
+
+ override val interpolate: Boolean = true
Review Comment:
```suggestion
override def interpolate: Boolean = true
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]