AngersZhuuuu removed a comment on pull request #31402:
URL: https://github.com/apache/spark/pull/31402#issuecomment-770690539
PostgresSQL
```
postgres=# create table testData(val int, val_long bigint, val_double double
precision, val_date date, val_timestamp timestamp, cate text);
CREATE TABLE
postgres=# insert into testData values (null, 1, 1.0, '2017-08-01',
to_timestamp(1501545600), 'a');
INSERT 0 1
postgres=# insert into testData values (1, 1, 1.0, '2017-08-01',
to_timestamp(1501545600), 'a');
INSERT 0 1
postgres=# insert into testData values (1, 2, 2.5, '2017-08-02',
to_timestamp(1502000000), 'a');
INSERT 0 1
postgres=# insert into testData values (2, 2147483650, 100.001,
'2020-12-31', to_timestamp(1609372800), 'a');
INSERT 0 1
postgres=# insert into testData values (1, null, 1.0, '2017-08-01',
to_timestamp(1501545600), 'b');
INSERT 0 1
postgres=# insert into testData values (2, 3, 3.3, '2017-08-03',
to_timestamp(1503000000), 'b');
INSERT 0 1
postgres=# insert into testData values (3, 2147483650, 100.001,
'2020-12-31', to_timestamp(1609372800), 'b');
INSERT 0 1
postgres=# insert into testData values (null, null, null, null, null, null);
INSERT 0 1
postgres=# insert into testData values (3, 1, 1.0, '2017-08-01',
to_timestamp(1501545600), null);
INSERT 0 1
postgres=#
postgres=#
postgres=#
postgres=# SELECT val, cate,
postgres-# rank() OVER w AS rank,
postgres-# dense_rank() OVER w AS dense_rank,
postgres-# cume_dist() OVER w AS cume_dist,
postgres-# percent_rank() OVER w AS percent_rank,
postgres-# ntile(2) OVER w AS ntile,
postgres-# row_number() OVER w AS row_number,
postgres-# var_pop(val) OVER w AS var_pop,
postgres-# var_samp(val) OVER w AS var_samp,
postgres-# covar_pop(val, val_long) OVER w AS covar_pop,
postgres-# corr(val, val_long) OVER w AS corr,
postgres-# stddev_samp(val) OVER w AS stddev_samp,
postgres-# stddev_pop(val) OVER w AS stddev_pop
postgres-# FROM testData
postgres-# WINDOW w AS (PARTITION BY cate ORDER BY val ASC nulls first)
postgres-# ORDER BY cate nulls first, val nulls first;
val | cate | rank | dense_rank | cume_dist | percent_rank |
ntile | row_number | var_pop | var_samp |
covar_pop | corr | stddev_samp | stddev_pop
-----+------+------+------------+--------------------+--------------------+-------+------------+------------------------+------------------------+-------------------+------+------------------------+------------------------
| | 1 | 1 | 0.5 | 0 |
1 | 1 | | |
| | |
3 | | 2 | 2 | 1 | 1 |
2 | 2 | 0 | |
0 | | | 0
| a | 1 | 1 | 0.25 | 0 |
1 | 1 | | |
| | |
1 | a | 2 | 2 | 0.75 | 0.3333333333333333 |
2 | 3 | 0 | 0 |
0 | | 0 | 0
1 | a | 2 | 2 | 0.75 | 0.3333333333333333 |
1 | 2 | 0 | 0 |
0 | | 0 | 0
2 | a | 4 | 3 | 1 | 1 |
2 | 4 | 0.22222222222222222222 | 0.33333333333333333333 |
477218588.5555555 | 1 | 0.57735026918962576451 | 0.47140452079103168293
1 | b | 1 | 1 | 0.3333333333333333 | 0 |
1 | 1 | 0 | |
| | | 0
2 | b | 2 | 2 | 0.6666666666666666 | 0.5 |
1 | 2 | 0.25000000000000000000 | 0.50000000000000000000 |
0 | | 0.70710678118654752440 | 0.50000000000000000000
3 | b | 3 | 3 | 1 | 1 |
2 | 3 | 0.66666666666666666667 | 1.00000000000000000000 |
536870911.75 | 1 | 1.00000000000000000000 | 0.81649658092772603273
(9 rows)
postgres=#
```
Spark:
With [this
change](https://github.com/apache/spark/pull/31402#discussion_r567518596):
```
SELECT val, cate,
rank() OVER w AS rank,
dense_rank() OVER w AS dense_rank,
cume_dist() OVER w AS cume_dist,
percent_rank() OVER w AS percent_rank,
ntile(2) OVER w AS ntile,
row_number() OVER w AS row_number,
var_pop(val) OVER w AS var_pop,
var_samp(val) OVER w AS var_samp,
covar_pop(val, val_long) OVER w AS covar_pop,
corr(val, val_long) OVER w AS corr,
stddev_samp(val) OVER w AS stddev_samp,
stddev_pop(val) OVER w AS stddev_pop
FROM testData
WINDOW w AS (PARTITION BY cate ORDER BY val ASC nulls first)
ORDER BY cate nulls first, val nulls first
-- !query schema
struct<val:int,cate:string,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,covar_pop:double,corr:double,stddev_samp:double,stddev_pop:double>
-- !query output
NULL NULL 1 1 0.5 0.0 1 1 NULL NULL
NULL NULL NULL NULL
3 NULL 2 2 1.0 1.0 2 2 0.0 NULL
0.0 NULL NULL 0.0
NULL a 1 1 0.25 0.0 1 1 NULL NULL
NULL NULL NULL NULL
1 a 2 2 0.75 0.3333333333333333 1 2
0.0 0.0 0.0 NULL 0.0 0.0
1 a 2 2 0.75 0.3333333333333333 2 3
0.0 0.0 0.0 NULL 0.0 0.0
2 a 4 3 1.0 1.0 2 4
0.22222222222222224 0.33333333333333337 4.772185885555555E8 1.0
0.5773502691896258 0.4714045207910317
1 b 1 1 0.3333333333333333 0.0 1 1
0.0 NULL NULL NULL NULL 0.0
2 b 2 2 0.6666666666666666 0.5 1 2
0.25 0.5 0.0 NULL 0.7071067811865476 0.5
3 b 3 3 1.0 1.0 2 3
0.6666666666666666 1.0 5.3687091175E8 1.0 1.0
0.816496580927726
```
Without [this
change](https://github.com/apache/spark/pull/31402#discussion_r567518596):
```
-- !query
SELECT val, cate,
rank() OVER w AS rank,
dense_rank() OVER w AS dense_rank,
cume_dist() OVER w AS cume_dist,
percent_rank() OVER w AS percent_rank,
ntile(2) OVER w AS ntile,
row_number() OVER w AS row_number,
var_pop(val) OVER w AS var_pop,
var_samp(val) OVER w AS var_samp,
covar_pop(val, val_long) OVER w AS covar_pop,
corr(val, val_long) OVER w AS corr,
stddev_samp(val) OVER w AS stddev_samp,
stddev_pop(val) OVER w AS stddev_pop
FROM testData
WINDOW w AS (PARTITION BY cate ORDER BY val ASC nulls first)
ORDER BY cate nulls first, val nulls first
-- !query schema
struct<val:int,cate:string,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,covar_pop:double,corr:double,stddev_samp:double,stddev_pop:double>
-- !query output
NULL NULL 1 1 0.5 0.0 1 1 NULL NULL
NULL NULL NULL NULL
3 NULL 2 2 1.0 1.0 2 2 0.0 NULL
0.0 NULL NULL 0.0
NULL a 1 1 0.25 0.0 1 1 NULL NULL
NULL NULL NULL NULL
1 a 2 2 0.75 0.3333333333333333 2 3
0.0 0.0 0.0 NULL 0.0 0.0
1 a 2 2 0.75 0.3333333333333333 2 3
0.0 0.0 0.0 NULL 0.0 0.0
2 a 4 3 1.0 1.0 2 4
0.22222222222222224 0.33333333333333337 4.772185885555555E8 1.0
0.5773502691896258 0.4714045207910317
1 b 1 1 0.3333333333333333 0.0 1 1
0.0 NULL NULL NULL NULL 0.0
2 b 2 2 0.6666666666666666 0.5 1 2
0.25 0.5 0.0 NULL 0.7071067811865476 0.5
3 b 3 3 1.0 1.0 2 3
0.6666666666666666 1.0 5.3687091175E8 1.0 1.0
0.816496580927726
```
We can see that RankLike WindowFunction need default frame.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]