Github user yhuai commented on a diff in the pull request:
https://github.com/apache/spark/pull/6370#discussion_r30941653
--- Diff: sql/core/src/main/scala/org/apache/spark/sql/functions.scala ---
@@ -326,168 +326,135 @@ object functions {
//////////////////////////////////////////////////////////////////////////////////////////////
/**
- * Window function: returns the lag value of current row of the
expression,
- * null when the current row extends before the beginning of the window.
+ * Window function: returns the value that is `offset` rows before the
current row, and
+ * `null` if there is less than `offset` rows before the current row.
For example,
+ * an `offset` of one will return the previous row at any given point in
the window partition.
*
- * @group window_funcs
- * @since 1.4.0
- */
- def lag(columnName: String): Column = {
- lag(columnName, 1)
- }
-
- /**
- * Window function: returns the lag value of current row of the column,
- * null when the current row extends before the beginning of the window.
+ * This is equivalent to the LAG function in SQL.
*
* @group window_funcs
* @since 1.4.0
*/
- def lag(e: Column): Column = {
- lag(e, 1)
+ def lag(e: Column, offset: Int): Column = {
+ lag(e, offset, null)
}
/**
- * Window function: returns the lag values of current row of the
expression,
- * null when the current row extends before the beginning of the window.
+ * Window function: returns the value that is `offset` rows before the
current row, and
+ * `null` if there is less than `offset` rows before the current row.
For example,
+ * an `offset` of one will return the previous row at any given point in
the window partition.
*
- * @group window_funcs
- * @since 1.4.0
- */
- def lag(e: Column, count: Int): Column = {
- lag(e, count, null)
- }
-
- /**
- * Window function: returns the lag values of current row of the column,
- * null when the current row extends before the beginning of the window.
+ * This is equivalent to the LAG function in SQL.
*
* @group window_funcs
* @since 1.4.0
*/
- def lag(columnName: String, count: Int): Column = {
- lag(columnName, count, null)
+ def lag(columnName: String, offset: Int): Column = {
+ lag(columnName, offset, null)
}
/**
- * Window function: returns the lag values of current row of the column,
- * given default value when the current row extends before the beginning
- * of the window.
+ * Window function: returns the value that is `offset` rows before the
current row, and
+ * `defaultValue` if there is less than `offset` rows before the current
row. For example,
+ * an `offset` of one will return the previous row at any given point in
the window partition.
*
- * @group window_funcs
- * @since 1.4.0
- */
- def lag(columnName: String, count: Int, defaultValue: Any): Column = {
- lag(Column(columnName), count, defaultValue)
- }
-
- /**
- * Window function: returns the lag values of current row of the
expression,
- * given default value when the current row extends before the beginning
- * of the window.
+ * This is equivalent to the LAG function in SQL.
*
* @group window_funcs
* @since 1.4.0
*/
- def lag(e: Column, count: Int, defaultValue: Any): Column = {
- UnresolvedWindowFunction("lag", e.expr :: Literal(count) ::
Literal(defaultValue) :: Nil)
+ def lag(columnName: String, offset: Int, defaultValue: Any): Column = {
+ lag(Column(columnName), offset, defaultValue)
}
/**
- * Window function: returns the lead value of current row of the column,
- * null when the current row extends before the end of the window.
+ * Window function: returns the value that is `offset` rows before the
current row, and
+ * `defaultValue` if there is less than `offset` rows before the current
row. For example,
+ * an `offset` of one will return the previous row at any given point in
the window partition.
*
- * @group window_funcs
- * @since 1.4.0
- */
- def lead(columnName: String): Column = {
- lead(columnName, 1)
- }
-
- /**
- * Window function: returns the lead value of current row of the
expression,
- * null when the current row extends before the end of the window.
+ * This is equivalent to the LAG function in SQL.
*
* @group window_funcs
* @since 1.4.0
*/
- def lead(e: Column): Column = {
- lead(e, 1)
+ def lag(e: Column, offset: Int, defaultValue: Any): Column = {
+ UnresolvedWindowFunction("lag", e.expr :: Literal(offset) ::
Literal(defaultValue) :: Nil)
}
/**
- * Window function: returns the lead values of current row of the column,
- * null when the current row extends before the end of the window.
+ * Window function: returns the value that is `offset` rows after the
current row, and
+ * `null` if there is less than `offset` rows after the current row. For
example,
+ * an `offset` of one will return the next row at any given point in the
window partition.
*
- * @group window_funcs
- * @since 1.4.0
- */
- def lead(columnName: String, count: Int): Column = {
- lead(columnName, count, null)
- }
-
- /**
- * Window function: returns the lead values of current row of the
expression,
- * null when the current row extends before the end of the window.
+ * This is equivalent to the LEAD function in SQL.
*
* @group window_funcs
* @since 1.4.0
*/
- def lead(e: Column, count: Int): Column = {
- lead(e, count, null)
+ def lead(columnName: String, offset: Int): Column = {
+ lead(columnName, offset, null)
}
/**
- * Window function: returns the lead values of current row of the column,
- * given default value when the current row extends before the end of
the window.
+ * Window function: returns the value that is `offset` rows after the
current row, and
+ * `null` if there is less than `offset` rows after the current row. For
example,
+ * an `offset` of one will return the next row at any given point in the
window partition.
+ *
+ * This is equivalent to the LEAD function in SQL.
*
* @group window_funcs
* @since 1.4.0
*/
- def lead(columnName: String, count: Int, defaultValue: Any): Column = {
- lead(Column(columnName), count, defaultValue)
+ def lead(e: Column, offset: Int): Column = {
+ lead(e, offset, null)
}
/**
- * Window function: returns the lead values of current row of the
expression,
- * given default value when the current row extends before the end of
the window.
+ * Window function: returns the value that is `offset` rows after the
current row, and
+ * `defaultValue` if there is less than `offset` rows after the current
row. For example,
+ * an `offset` of one will return the next row at any given point in the
window partition.
+ *
+ * This is equivalent to the LEAD function in SQL.
*
* @group window_funcs
* @since 1.4.0
*/
- def lead(e: Column, count: Int, defaultValue: Any): Column = {
- UnresolvedWindowFunction("lead", e.expr :: Literal(count) ::
Literal(defaultValue) :: Nil)
+ def lead(columnName: String, offset: Int, defaultValue: Any): Column = {
+ lead(Column(columnName), offset, defaultValue)
}
/**
- * NTILE for specified expression.
- * NTILE allows easy calculation of tertiles, quartiles, deciles and
other
- * common summary statistics. This function divides an ordered partition
into a specified
- * number of groups called buckets and assigns a bucket number to each
row in the partition.
+ * Window function: returns the value that is `offset` rows after the
current row, and
+ * `defaultValue` if there is less than `offset` rows after the current
row. For example,
+ * an `offset` of one will return the next row at any given point in the
window partition.
+ *
+ * This is equivalent to the LEAD function in SQL.
*
* @group window_funcs
* @since 1.4.0
*/
- def ntile(e: Column): Column = {
- UnresolvedWindowFunction("ntile", e.expr :: Nil)
+ def lead(e: Column, offset: Int, defaultValue: Any): Column = {
+ UnresolvedWindowFunction("lead", e.expr :: Literal(offset) ::
Literal(defaultValue) :: Nil)
}
/**
- * NTILE for specified column.
- * NTILE allows easy calculation of tertiles, quartiles, deciles and
other
- * common summary statistics. This function divides an ordered partition
into a specified
- * number of groups called buckets and assigns a bucket number to each
row in the partition.
+ * Window function: returns a group id from 1 to `n` (inclusive) in a
round-robin fashion in
+ * a window partition. Fow example, if `n` is 3, the first row will get
1, the second row will
+ * get 2, the third row will get 3, and the fourth row will get 1...
+ *
+ * This is equivalent to the NTILE function in SQL.
--- End diff --
This one looks weird. What does round-robin mean at here? Also, see
https://msdn.microsoft.com/en-us/library/ms175126.aspx
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]