This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new eb5aa22026 fix: make `ntile` work in some corner cases (#8371)
eb5aa22026 is described below

commit eb5aa220261e9f5ec3e4cce3098f6de92f820785
Author: Huaijin <[email protected]>
AuthorDate: Sat Dec 2 03:53:58 2023 +0800

    fix: make `ntile` work in some corner cases (#8371)
    
    * fix: make ntile work in some corner cases
    
    * fix comments
    
    * minor
    
    * Update datafusion/sqllogictest/test_files/window.slt
    
    Co-authored-by: Mustafa Akur 
<[email protected]>
    
    ---------
    
    Co-authored-by: Mustafa Akur 
<[email protected]>
---
 datafusion/expr/src/window_function.rs        |  15 ++-
 datafusion/physical-expr/src/window/ntile.rs  |   3 +-
 datafusion/physical-plan/src/windows/mod.rs   |  29 +++--
 datafusion/sqllogictest/test_files/window.slt | 146 ++++++++++++++++++++++++++
 4 files changed, 182 insertions(+), 11 deletions(-)

diff --git a/datafusion/expr/src/window_function.rs 
b/datafusion/expr/src/window_function.rs
index 946a80dd84..610f1ecaea 100644
--- a/datafusion/expr/src/window_function.rs
+++ b/datafusion/expr/src/window_function.rs
@@ -268,7 +268,20 @@ impl BuiltInWindowFunction {
             BuiltInWindowFunction::FirstValue | 
BuiltInWindowFunction::LastValue => {
                 Signature::any(1, Volatility::Immutable)
             }
-            BuiltInWindowFunction::Ntile => Signature::any(1, 
Volatility::Immutable),
+            BuiltInWindowFunction::Ntile => Signature::uniform(
+                1,
+                vec![
+                    DataType::UInt64,
+                    DataType::UInt32,
+                    DataType::UInt16,
+                    DataType::UInt8,
+                    DataType::Int64,
+                    DataType::Int32,
+                    DataType::Int16,
+                    DataType::Int8,
+                ],
+                Volatility::Immutable,
+            ),
             BuiltInWindowFunction::NthValue => Signature::any(2, 
Volatility::Immutable),
         }
     }
diff --git a/datafusion/physical-expr/src/window/ntile.rs 
b/datafusion/physical-expr/src/window/ntile.rs
index 49aac0877a..f5442e1b0f 100644
--- a/datafusion/physical-expr/src/window/ntile.rs
+++ b/datafusion/physical-expr/src/window/ntile.rs
@@ -96,8 +96,9 @@ impl PartitionEvaluator for NtileEvaluator {
     ) -> Result<ArrayRef> {
         let num_rows = num_rows as u64;
         let mut vec: Vec<u64> = Vec::new();
+        let n = u64::min(self.n, num_rows);
         for i in 0..num_rows {
-            let res = i * self.n / num_rows;
+            let res = i * n / num_rows;
             vec.push(res + 1)
         }
         Ok(Arc::new(UInt64Array::from(vec)))
diff --git a/datafusion/physical-plan/src/windows/mod.rs 
b/datafusion/physical-plan/src/windows/mod.rs
index d97e3c93a1..828dcb4b13 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -189,15 +189,26 @@ fn create_built_in_window_expr(
         BuiltInWindowFunction::PercentRank => Arc::new(percent_rank(name)),
         BuiltInWindowFunction::CumeDist => Arc::new(cume_dist(name)),
         BuiltInWindowFunction::Ntile => {
-            let n: i64 = get_scalar_value_from_args(args, 0)?
-                .ok_or_else(|| {
-                    DataFusionError::Execution(
-                        "NTILE requires at least 1 argument".to_string(),
-                    )
-                })?
-                .try_into()?;
-            let n: u64 = n as u64;
-            Arc::new(Ntile::new(name, n))
+            let n = get_scalar_value_from_args(args, 0)?.ok_or_else(|| {
+                DataFusionError::Execution(
+                    "NTILE requires a positive integer".to_string(),
+                )
+            })?;
+
+            if n.is_null() {
+                return exec_err!("NTILE requires a positive integer, but finds 
NULL");
+            }
+
+            if n.is_unsigned() {
+                let n: u64 = n.try_into()?;
+                Arc::new(Ntile::new(name, n))
+            } else {
+                let n: i64 = n.try_into()?;
+                if n <= 0 {
+                    return exec_err!("NTILE requires a positive integer");
+                }
+                Arc::new(Ntile::new(name, n as u64))
+            }
         }
         BuiltInWindowFunction::Lag => {
             let arg = args[0].clone();
diff --git a/datafusion/sqllogictest/test_files/window.slt 
b/datafusion/sqllogictest/test_files/window.slt
index b2491478d8..bb6ca11948 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -3581,3 +3581,149 @@ CREATE TABLE new_table AS SELECT NTILE(2) OVER(ORDER BY 
c1) AS ntile_2 FROM aggr
 
 statement ok
 DROP TABLE new_table;
+
+statement ok
+CREATE TABLE t1 (a int) AS VALUES (1), (2), (3);
+
+query I
+SELECT NTILE(9223377) OVER(ORDER BY a) FROM t1;
+----
+1
+2
+3
+
+query I
+SELECT NTILE(9223372036854775809) OVER(ORDER BY a) FROM t1;
+----
+1
+2
+3
+
+query error DataFusion error: Execution error: NTILE requires a positive 
integer
+SELECT NTILE(-922337203685477580) OVER(ORDER BY a) FROM t1;
+
+query error DataFusion error: Execution error: Table 't' doesn't exist\.
+DROP TABLE t;
+
+# NTILE with PARTITION BY, those tests from duckdb: 
https://github.com/duckdb/duckdb/blob/main/test/sql/window/test_ntile.test
+statement ok
+CREATE TABLE score_board (team_name VARCHAR, player VARCHAR, score INTEGER) as 
VALUES
+          ('Mongrels', 'Apu', 350),
+          ('Mongrels', 'Ned', 666),
+          ('Mongrels', 'Meg', 1030),
+          ('Mongrels', 'Burns', 1270),
+          ('Simpsons', 'Homer', 1),
+          ('Simpsons', 'Lisa', 710),
+          ('Simpsons', 'Marge', 990),
+          ('Simpsons', 'Bart', 2010)
+
+query TTII
+SELECT
+  team_name,
+  player,
+  score,
+  NTILE(2) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+ORDER BY team_name, score;
+----
+Mongrels       Apu     350     1
+Mongrels       Ned     666     1
+Mongrels       Meg     1030    2
+Mongrels       Burns   1270    2
+Simpsons       Homer   1       1
+Simpsons       Lisa    710     1
+Simpsons       Marge   990     2
+Simpsons       Bart    2010    2
+
+query TTII
+SELECT
+  team_name,
+  player,
+  score,
+  NTILE(2) OVER (ORDER BY score ASC) AS NTILE
+FROM score_board s
+ORDER BY score;
+----
+Simpsons       Homer   1       1
+Mongrels       Apu     350     1
+Mongrels       Ned     666     1
+Simpsons       Lisa    710     1
+Simpsons       Marge   990     2
+Mongrels       Meg     1030    2
+Mongrels       Burns   1270    2
+Simpsons       Bart    2010    2
+
+query TTII
+SELECT
+  team_name,
+  player,
+  score,
+  NTILE(1000) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+ORDER BY team_name, score;
+----
+Mongrels       Apu     350     1
+Mongrels       Ned     666     2
+Mongrels       Meg     1030    3
+Mongrels       Burns   1270    4
+Simpsons       Homer   1       1
+Simpsons       Lisa    710     2
+Simpsons       Marge   990     3
+Simpsons       Bart    2010    4
+
+query TTII
+SELECT
+  team_name,
+  player,
+  score,
+  NTILE(1) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+ORDER BY team_name, score;
+----
+Mongrels       Apu     350     1
+Mongrels       Ned     666     1
+Mongrels       Meg     1030    1
+Mongrels       Burns   1270    1
+Simpsons       Homer   1       1
+Simpsons       Lisa    710     1
+Simpsons       Marge   990     1
+Simpsons       Bart    2010    1
+
+# incorrect number of parameters for ntile
+query error DataFusion error: Execution error: NTILE requires a positive 
integer, but finds NULL
+SELECT
+  NTILE(NULL) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+
+query error DataFusion error: Execution error: NTILE requires a positive 
integer
+SELECT
+  NTILE(-1) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+
+query error DataFusion error: Execution error: NTILE requires a positive 
integer
+SELECT
+  NTILE(0) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+
+statement error
+SELECT
+  NTILE() OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+
+statement error
+SELECT
+  NTILE(1,2) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+
+statement error
+SELECT
+  NTILE(1,2,3) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+
+statement error
+SELECT
+  NTILE(1,2,3,4) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
+FROM score_board s
+
+statement ok
+DROP TABLE score_board;

Reply via email to