This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 1139e9b [SPARK-31390][SQL][DOCS] Document Window Function in SQL Syntax Section 1139e9b is described below commit 1139e9b50150e3a99a9c8df0ed57d3fd2b391788 Author: Huaxin Gao <huax...@us.ibm.com> AuthorDate: Sat Apr 18 09:31:52 2020 +0900 [SPARK-31390][SQL][DOCS] Document Window Function in SQL Syntax Section ### What changes were proposed in this pull request? Document Window Function in SQL syntax ### Why are the changes needed? Make SQL Reference complete ### Does this PR introduce any user-facing change? Yes <img width="1050" alt="Screen Shot 2020-04-16 at 9 13 34 PM" src="https://user-images.githubusercontent.com/13592258/79531509-7bf5af00-8027-11ea-8291-a91b2e97a1b5.png"> <img width="1050" alt="Screen Shot 2020-04-16 at 9 14 12 PM" src="https://user-images.githubusercontent.com/13592258/79531514-7e580900-8027-11ea-8761-4c5a888c476f.png"> <img width="1050" alt="Screen Shot 2020-04-16 at 9 14 45 PM" src="https://user-images.githubusercontent.com/13592258/79531518-82842680-8027-11ea-876f-6375aa5b5ead.png"> <img width="1050" alt="Screen Shot 2020-04-16 at 9 15 10 PM" src="https://user-images.githubusercontent.com/13592258/79531521-844dea00-8027-11ea-8948-712f054d42ee.png"> <img width="1050" alt="Screen Shot 2020-04-16 at 9 15 25 PM" src="https://user-images.githubusercontent.com/13592258/79531528-8748da80-8027-11ea-9dae-a465286982ac.png"> ### How was this patch tested? Manually build and check Closes #28220 from huaxingao/sql-win-fun. Authored-by: Huaxin Gao <huax...@us.ibm.com> Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> (cherry picked from commit 142f43629c42ad750d9b506283191aa830d95c08) Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> --- docs/_data/menu-sql.yaml | 2 + docs/sql-ref-syntax-qry-window.md | 190 +++++++++++++++++++++++++++++++++++++- 2 files changed, 189 insertions(+), 3 deletions(-) diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml index 7827a0f..5042c2588 100644 --- a/docs/_data/menu-sql.yaml +++ b/docs/_data/menu-sql.yaml @@ -168,6 +168,8 @@ url: sql-ref-syntax-qry-select-inline-table.html - text: Common Table Expression url: sql-ref-syntax-qry-select-cte.html + - text: Window Function + url: sql-ref-syntax-qry-window.html - text: EXPLAIN url: sql-ref-syntax-qry-explain.html - text: Auxiliary Statements diff --git a/docs/sql-ref-syntax-qry-window.md b/docs/sql-ref-syntax-qry-window.md index 767f477..4ec1af7 100644 --- a/docs/sql-ref-syntax-qry-window.md +++ b/docs/sql-ref-syntax-qry-window.md @@ -1,7 +1,7 @@ --- layout: global -title: Windowing Analytic Functions -displayTitle: Windowing Analytic Functions +title: Window Functions +displayTitle: Window Functions license: | Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with @@ -19,4 +19,188 @@ license: | limitations under the License. --- -**This page is under construction** +### Description + +Window functions operate on a group of rows, referred to as a window, and calculate a return value for each row based on the group of rows. Window functions are useful for processing tasks such as calculating a moving average, computing a cumulative statistic, or accessing the value of rows given the relative position of the current row. + +### Syntax + +{% highlight sql %} +window_function OVER +( [ { PARTITION | DISTRIBUTE } BY partition_col_name = partition_col_val ( [ , ... ] ) ] + { ORDER | SORT } BY expression [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [ , ... ] + [ window_frame ] ) +{% endhighlight %} + +### Parameters + +<dl> + <dt><code><em>window_function</em></code></dt> + <dd> + <ul> + <li>Ranking Functions</li> + <br> + <b>Syntax:</b> + <code> + RANK | DENSE_RANK | PERCENT_RANK | NTILE | ROW_NUMBER + </code> + </ul> + <ul> + <li>Analytic Functions</li> + <br> + <b>Syntax:</b> + <code> + CUME_DIST | LAG | LEAD + </code> + </ul> + <ul> + <li>Aggregate Functions</li> + <br> + <b>Syntax:</b> + <code> + MAX | MIN | COUNT | SUM | AVG | ... + </code> + <br> + Please refer to the <a href="api/sql/">Built-in Functions</a> document for a complete list of Spark aggregate functions. + </ul> + </dd> +</dl> +<dl> + <dt><code><em>window_frame</em></code></dt> + <dd> + Specifies which row to start the window on and where to end it.<br> + <b>Syntax:</b><br> + <code>{ RANGE | ROWS } { frame_start | BETWEEN frame_start AND frame_end }</code><br> + If frame_end is omitted it defaults to CURRENT ROW.<br><br> + <ul> + <code>frame_start</code> and <code>frame_end</code> have the following syntax<br> + <b>Syntax:</b><br> + <code> + UNBOUNDED PRECEDING | offset PRECEDING | CURRENT ROW | offset FOLLOWING | UNBOUNDED FOLLOWING + </code><br> + <code>offset:</code>specifies the <code>offset</code> from the position of the current row. + </ul> + </dd> +</dl> + +### Examples + +{% highlight sql %} +CREATE TABLE employees (name STRING, dept STRING, salary INT, age INT); + +INSERT INTO employees VALUES ("Lisa", "Sales", 10000, 35); +INSERT INTO employees VALUES ("Evan", "Sales", 32000, 38); +INSERT INTO employees VALUES ("Fred", "Engineering", 21000, 28); +INSERT INTO employees VALUES ("Alex", "Sales", 30000, 33); +INSERT INTO employees VALUES ("Tom", "Engineering", 23000, 33); +INSERT INTO employees VALUES ("Jane", "Marketing", 29000, 28); +INSERT INTO employees VALUES ("Jeff", "Marketing", 35000, 38); +INSERT INTO employees VALUES ("Paul", "Engineering", 29000, 23); +INSERT INTO employees VALUES ("Chloe", "Engineering", 23000, 25); + +SELECT * FROM employees; + +-----+-----------+------+-----+ + | name| dept|salary| age| + +-----+-----------+------+-----+ + |Chloe|Engineering| 23000| 25| + | Fred|Engineering| 21000| 28| + | Paul|Engineering| 29000| 23| + |Helen| Marketing| 29000| 40| + | Tom|Engineering| 23000| 33| + | Jane| Marketing| 29000| 28| + | Jeff| Marketing| 35000| 38| + | Evan| Sales| 32000| 38| + | Lisa| Sales| 10000| 35| + | Alex| Sales| 30000| 33| + +-----+-----------+------+-----+ + +SELECT name, dept, RANK() OVER (PARTITION BY dept ORDER BY salary) AS rank FROM employees; + +-----+-----------+------+----+ + | name| dept|salary|rank| + +-----+-----------+------+----+ + | Lisa| Sales| 10000| 1| + | Alex| Sales| 30000| 2| + | Evan| Sales| 32000| 3| + | Fred|Engineering| 21000| 1| + | Tom|Engineering| 23000| 2| + |Chloe|Engineering| 23000| 2| + | Paul|Engineering| 29000| 4| + |Helen| Marketing| 29000| 1| + | Jane| Marketing| 29000| 1| + | Jeff| Marketing| 35000| 3| + +-----+-----------+------+----+ + +SELECT name, dept, DENSE_RANK() OVER (PARTITION BY dept ORDER BY salary ROWS BETWEEN + UNBOUNDED PRECEDING AND CURRENT ROW) AS dense_rank FROM employees; + +-----+-----------+------+----------+ + | name| dept|salary|dense_rank| + +-----+-----------+------+----------+ + | Lisa| Sales| 10000| 1| + | Alex| Sales| 30000| 2| + | Evan| Sales| 32000| 3| + | Fred|Engineering| 21000| 1| + | Tom|Engineering| 23000| 2| + |Chloe|Engineering| 23000| 2| + | Paul|Engineering| 29000| 3| + |Helen| Marketing| 29000| 1| + | Jane| Marketing| 29000| 1| + | Jeff| Marketing| 35000| 2| + +-----+-----------+------+----------+ + +SELECT name, dept, age, CUME_DIST() OVER (PARTITION BY dept ORDER BY age + RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cume_dist FROM employees; + +-----+-----------+------+------------------+ + | name| dept|age | cume_dist| + +-----+-----------+------+------------------+ + | Alex| Sales| 33|0.3333333333333333| + | Lisa| Sales| 35|0.6666666666666666| + | Evan| Sales| 38| 1.0| + | Paul|Engineering| 23| 0.25| + |Chloe|Engineering| 25| 0.75| + | Fred|Engineering| 28| 0.25| + | Tom|Engineering| 33| 1.0| + | Jane| Marketing| 28|0.3333333333333333| + | Jeff| Marketing| 38|0.6666666666666666| + |Helen| Marketing| 40| 1.0| + +-----+-----------+------+------------------+ + +SELECT name, dept, salary, MIN(salary) OVER (PARTITION BY dept ORDER BY salary) AS min + FROM employees; + +-----+-----------+------+-----+ + | name| dept|salary| min| + +-----+-----------+------+-----+ + | Lisa| Sales| 10000|10000| + | Alex| Sales| 30000|10000| + | Evan| Sales| 32000|10000| + |Helen| Marketing| 29000|29000| + | Jane| Marketing| 29000|29000| + | Jeff| Marketing| 35000|29000| + | Fred|Engineering| 21000|21000| + | Tom|Engineering| 23000|21000| + |Chloe|Engineering| 23000|21000| + | Paul|Engineering| 29000|21000| + +-----+-----------+------+-----+ + +SELECT name, salary, + LAG(salary) OVER (PARTITION BY dept ORDER BY salary) AS lag, + LEAD(salary, 1, 0) OVER (PARTITION BY dept ORDER BY salary) AS lead + FROM employees; + +-----+-----------+------+-----+-----+ + | name| dept|salary| lag| lead| + +-----+-----------+------+-----+-----+ + | Lisa| Sales| 10000|NULL |30000| + | Alex| Sales| 30000|10000|32000| + | Evan| Sales| 32000|30000| 0| + | Fred|Engineering| 21000| NULL|23000| + |Chloe|Engineering| 23000|21000|23000| + | Tom|Engineering| 23000|23000|29000| + | Paul|Engineering| 29000|23000| 0| + |Helen| Marketing| 29000| NULL|29000| + | Jane| Marketing| 29000|29000|35000| + | Jeff| Marketing| 35000|29000| 0| + +-----+-----------+------+-----+-----+ +{% endhighlight %} + +### Related Statements + + * [SELECT](sql-ref-syntax-qry-select.html) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org