This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new eb5aa22026 fix: make `ntile` work in some corner cases (#8371)
eb5aa22026 is described below
commit eb5aa220261e9f5ec3e4cce3098f6de92f820785
Author: Huaijin <[email protected]>
AuthorDate: Sat Dec 2 03:53:58 2023 +0800
fix: make `ntile` work in some corner cases (#8371)
* fix: make ntile work in some corner cases
* fix comments
* minor
* Update datafusion/sqllogictest/test_files/window.slt
Co-authored-by: Mustafa Akur
<[email protected]>
---------
Co-authored-by: Mustafa Akur
<[email protected]>
---
datafusion/expr/src/window_function.rs | 15 ++-
datafusion/physical-expr/src/window/ntile.rs | 3 +-
datafusion/physical-plan/src/windows/mod.rs | 29 +++--
datafusion/sqllogictest/test_files/window.slt | 146 ++++++++++++++++++++++++++
4 files changed, 182 insertions(+), 11 deletions(-)
diff --git a/datafusion/expr/src/window_function.rs
b/datafusion/expr/src/window_function.rs
index 946a80dd84..610f1ecaea 100644
--- a/datafusion/expr/src/window_function.rs
+++ b/datafusion/expr/src/window_function.rs
@@ -268,7 +268,20 @@ impl BuiltInWindowFunction {
BuiltInWindowFunction::FirstValue |
BuiltInWindowFunction::LastValue => {
Signature::any(1, Volatility::Immutable)
}
- BuiltInWindowFunction::Ntile => Signature::any(1,
Volatility::Immutable),
+ BuiltInWindowFunction::Ntile => Signature::uniform(
+ 1,
+ vec![
+ DataType::UInt64,
+ DataType::UInt32,
+ DataType::UInt16,
+ DataType::UInt8,
+ DataType::Int64,
+ DataType::Int32,
+ DataType::Int16,
+ DataType::Int8,
+ ],
+ Volatility::Immutable,
+ ),
BuiltInWindowFunction::NthValue => Signature::any(2,
Volatility::Immutable),
}
}
diff --git a/datafusion/physical-expr/src/window/ntile.rs
b/datafusion/physical-expr/src/window/ntile.rs
index 49aac0877a..f5442e1b0f 100644
--- a/datafusion/physical-expr/src/window/ntile.rs
+++ b/datafusion/physical-expr/src/window/ntile.rs
@@ -96,8 +96,9 @@ impl PartitionEvaluator for NtileEvaluator {
) -> Result<ArrayRef> {
let num_rows = num_rows as u64;
let mut vec: Vec<u64> = Vec::new();
+ let n = u64::min(self.n, num_rows);
for i in 0..num_rows {
- let res = i * self.n / num_rows;
+ let res = i * n / num_rows;
vec.push(res + 1)
}
Ok(Arc::new(UInt64Array::from(vec)))
diff --git a/datafusion/physical-plan/src/windows/mod.rs
b/datafusion/physical-plan/src/windows/mod.rs
index d97e3c93a1..828dcb4b13 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -189,15 +189,26 @@ fn create_built_in_window_expr(
BuiltInWindowFunction::PercentRank => Arc::new(percent_rank(name)),
BuiltInWindowFunction::CumeDist => Arc::new(cume_dist(name)),
BuiltInWindowFunction::Ntile => {
- let n: i64 = get_scalar_value_from_args(args, 0)?
- .ok_or_else(|| {
- DataFusionError::Execution(
- "NTILE requires at least 1 argument".to_string(),
- )
- })?
- .try_into()?;
- let n: u64 = n as u64;
- Arc::new(Ntile::new(name, n))
+ let n = get_scalar_value_from_args(args, 0)?.ok_or_else(|| {
+ DataFusionError::Execution(
+ "NTILE requires a positive integer".to_string(),
+ )
+ })?;
+
+ if n.is_null() {
+ return exec_err!("NTILE requires a positive integer, but finds
NULL");
+ }
+
+ if n.is_unsigned() {
+ let n: u64 = n.try_into()?;
+ Arc::new(Ntile::new(name, n))
+ } else {
+ let n: i64 = n.try_into()?;
+ if n <= 0 {
+ return exec_err!("NTILE requires a positive integer");
+ }
+ Arc::new(Ntile::new(name, n as u64))
+ }
}
BuiltInWindowFunction::Lag => {
let arg = args[0].clone();
diff --git a/datafusion/sqllogictest/test_files/window.slt
b/datafusion/sqllogictest/test_files/window.slt
index b2491478d8..bb6ca11948 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -3581,3 +3581,149 @@ CREATE TABLE new_table AS SELECT NTILE(2) OVER(ORDER BY
c1) AS ntile_2 FROM aggr
statement ok
DROP TABLE new_table;
+
+statement ok
+CREATE TABLE t1 (a int) AS VALUES (1), (2), (3);
+
+query I
+SELECT NTILE(9223377) OVER(ORDER BY a) FROM t1;
+----
+1
+2
+3
+
+query I
+SELECT NTILE(9223372036854775809) OVER(ORDER BY a) FROM t1;
+----
+1
+2
+3
+
+query error DataFusion error: Execution error: NTILE requires a positive
integer
+SELECT NTILE(-922337203685477580) OVER(ORDER BY a) FROM t1;
+
+query error DataFusion error: Execution error: Table 't' doesn't exist\.
+DROP TABLE t;
+
+# NTILE with PARTITION BY, those tests from duckdb:
https://github.com/duckdb/duckdb/blob/main/test/sql/window/test_ntile.test
+statement ok
+CREATE TABLE score_board (team_name VARCHAR, player VARCHAR, score INTEGER) as
VALUES
+ ('Mongrels', 'Apu', 350),
+ ('Mongrels', 'Ned', 666),
+ ('Mongrels', 'Meg', 1030),
+ ('Mongrels', 'Burns', 1270),
+ ('Simpsons', 'Homer', 1),
+ ('Simpsons', 'Lisa', 710),
+ ('Simpsons', 'Marge', 990),
+ ('Simpsons', 'Bart', 2010)
+
+query TTII
+SELECT
+ team_name,
+ player,
+ score,
+ NTILE(2) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+ORDER BY team_name, score;
+----
+Mongrels Apu 350 1
+Mongrels Ned 666 1
+Mongrels Meg 1030 2
+Mongrels Burns 1270 2
+Simpsons Homer 1 1
+Simpsons Lisa 710 1
+Simpsons Marge 990 2
+Simpsons Bart 2010 2
+
+query TTII
+SELECT
+ team_name,
+ player,
+ score,
+ NTILE(2) OVER (ORDER BY score ASC) AS NTILE
+FROM score_board s
+ORDER BY score;
+----
+Simpsons Homer 1 1
+Mongrels Apu 350 1
+Mongrels Ned 666 1
+Simpsons Lisa 710 1
+Simpsons Marge 990 2
+Mongrels Meg 1030 2
+Mongrels Burns 1270 2
+Simpsons Bart 2010 2
+
+query TTII
+SELECT
+ team_name,
+ player,
+ score,
+ NTILE(1000) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+ORDER BY team_name, score;
+----
+Mongrels Apu 350 1
+Mongrels Ned 666 2
+Mongrels Meg 1030 3
+Mongrels Burns 1270 4
+Simpsons Homer 1 1
+Simpsons Lisa 710 2
+Simpsons Marge 990 3
+Simpsons Bart 2010 4
+
+query TTII
+SELECT
+ team_name,
+ player,
+ score,
+ NTILE(1) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+ORDER BY team_name, score;
+----
+Mongrels Apu 350 1
+Mongrels Ned 666 1
+Mongrels Meg 1030 1
+Mongrels Burns 1270 1
+Simpsons Homer 1 1
+Simpsons Lisa 710 1
+Simpsons Marge 990 1
+Simpsons Bart 2010 1
+
+# incorrect number of parameters for ntile
+query error DataFusion error: Execution error: NTILE requires a positive
integer, but finds NULL
+SELECT
+ NTILE(NULL) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+
+query error DataFusion error: Execution error: NTILE requires a positive
integer
+SELECT
+ NTILE(-1) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+
+query error DataFusion error: Execution error: NTILE requires a positive
integer
+SELECT
+ NTILE(0) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+
+statement error
+SELECT
+ NTILE() OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+
+statement error
+SELECT
+ NTILE(1,2) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+
+statement error
+SELECT
+ NTILE(1,2,3) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+
+statement error
+SELECT
+ NTILE(1,2,3,4) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+
+statement ok
+DROP TABLE score_board;