This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new c4562dc7fc [Minor]: support window functions in order by expressions 
(#20963)
c4562dc7fc is described below

commit c4562dc7fc961618f3c4c30b4a1037276ee8ba68
Author: Burak Şen <[email protected]>
AuthorDate: Thu Mar 26 00:15:46 2026 +0300

    [Minor]: support window functions in order by expressions (#20963)
    
    ## Which issue does this PR close?
    
    
    - Closes #608.
    
    ## Rationale for this change
    #608 has details but as @alamb describes in
    https://github.com/apache/datafusion/issues/608#issuecomment-4034162591
    ```
    SELECT c2
      FROM test
      ORDER BY max(c3) OVER (ORDER BY c9);
    ```
    
    fails with:
    
    ```
    This feature is not implemented: Physical plan does not support logical 
expression WindowFunction(WindowFunction { fun: AggregateUDF(AggregateUDF { 
inner: Max { signature: Signature { type_signature: UserDefined, volatility: 
Immutable, parameter_names: None } } }), params: WindowFunctionParams { args: 
[Column(Column { relation: Some(Bare { table: "test" }), name: "c3" })], 
partition_by: [], order_by: [Sort { expr: Column(Column { relation: Some(Bare { 
table: "test" }), name: "c9" }),  [...]
    ```
    ## What changes are included in this PR?
    - relevant change
    - slt and unit tests
    
    ## Are these changes tested?
    Yes added both unit tests for plan and slt tests for checking output
    
    ## Are there any user-facing changes?
    Additive user changes that users can now use window in order by
    expressions
---
 datafusion/sql/src/select.rs                  |  40 ++++++--
 datafusion/sql/tests/sql_integration.rs       | 142 ++++++++++++++++++++++++++
 datafusion/sqllogictest/test_files/window.slt |  50 +++++++++
 3 files changed, 222 insertions(+), 10 deletions(-)

diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index bb600a2a50..0e719916f2 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -257,7 +257,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             select_exprs: mut select_exprs_post_aggr,
             having_expr: having_expr_post_aggr,
             qualify_expr: qualify_expr_post_aggr,
-            order_by_exprs: order_by_rex,
+            order_by_exprs: mut order_by_rex,
         } = if !group_by_exprs.is_empty() || !aggr_exprs.is_empty() {
             self.aggregate(
                 &base_plan,
@@ -293,14 +293,23 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             plan
         };
 
-        // The outer expressions we will search through for window functions.
-        // Window functions may be sourced from the SELECT list or from the 
QUALIFY expression.
-        let windows_expr_haystack = select_exprs_post_aggr
-            .iter()
-            .chain(qualify_expr_post_aggr.iter());
+        // The window expressions from SELECT and QUALIFY only, used to 
validate that
+        // QUALIFY is used with window functions (ORDER BY window functions 
don't count).
+        let qualify_window_func_exprs = find_window_exprs(
+            select_exprs_post_aggr
+                .iter()
+                .chain(qualify_expr_post_aggr.iter()),
+        );
+
         // All of the window expressions (deduplicated and rewritten to 
reference aggregates as
-        // columns from input).
-        let window_func_exprs = find_window_exprs(windows_expr_haystack);
+        // columns from input). Window functions may be sourced from the 
SELECT list, QUALIFY
+        // expression, or ORDER BY.
+        let window_func_exprs = find_window_exprs(
+            select_exprs_post_aggr
+                .iter()
+                .chain(qualify_expr_post_aggr.iter())
+                .chain(order_by_rex.iter().map(|s| &s.expr)),
+        );
 
         // Process window functions after aggregation as they can reference
         // aggregate functions in their body
@@ -315,14 +324,25 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 .map(|expr| rebase_expr(expr, &window_func_exprs, &plan))
                 .collect::<Result<Vec<Expr>>>()?;
 
+            order_by_rex = order_by_rex
+                .into_iter()
+                .map(|sort_expr| {
+                    Ok(sort_expr.with_expr(rebase_expr(
+                        &sort_expr.expr,
+                        &window_func_exprs,
+                        &plan,
+                    )?))
+                })
+                .collect::<Result<Vec<_>>>()?;
+
             plan
         };
 
         // Process QUALIFY clause after window functions
         // QUALIFY filters the results of window functions, similar to how 
HAVING filters aggregates
         let plan = if let Some(qualify_expr) = qualify_expr_post_aggr {
-            // Validate that QUALIFY is used with window functions
-            if window_func_exprs.is_empty() {
+            // Validate that QUALIFY is used with window functions in SELECT 
or QUALIFY
+            if qualify_window_func_exprs.is_empty() {
                 return plan_err!(
                     "QUALIFY clause requires window functions in the SELECT 
list or QUALIFY clause"
                 );
diff --git a/datafusion/sql/tests/sql_integration.rs 
b/datafusion/sql/tests/sql_integration.rs
index 29c17be69c..346f6929da 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -2805,6 +2805,138 @@ fn over_order_by_with_window_frame_double_end() {
     );
 }
 
+#[test]
+fn window_function_only_in_order_by() {
+    let sql = "SELECT order_id FROM orders ORDER BY MAX(qty) OVER (ORDER BY 
order_id)";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r"
+    Projection: orders.order_id
+      Sort: max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE 
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ASC NULLS LAST
+        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id 
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+          WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id 
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+            TableScan: orders
+    "
+    );
+}
+
+#[test]
+fn window_function_in_select_and_order_by() {
+    let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id) FROM orders 
ORDER BY MAX(qty) OVER (ORDER BY order_id)";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r"
+    Sort: max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE 
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ASC NULLS LAST
+      Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id 
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+        WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC 
NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+          TableScan: orders
+    "
+    );
+}
+
+#[test]
+fn window_function_in_order_by_nested_expr() {
+    let sql =
+        "SELECT order_id FROM orders ORDER BY MAX(qty) OVER (ORDER BY 
order_id) + 1";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r"
+    Projection: orders.order_id
+      Sort: max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE 
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + Int64(1) ASC NULLS LAST
+        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id 
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+          WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id 
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+            TableScan: orders
+    "
+    );
+}
+
+#[test]
+fn window_function_in_order_by_desc() {
+    let sql =
+        "SELECT order_id FROM orders ORDER BY MAX(qty) OVER (ORDER BY 
order_id) DESC";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r"
+    Projection: orders.order_id
+      Sort: max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE 
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW DESC NULLS FIRST
+        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id 
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+          WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id 
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+            TableScan: orders
+    "
+    );
+}
+
+#[test]
+fn multiple_window_functions_in_order_by() {
+    let sql = "SELECT order_id FROM orders ORDER BY MAX(qty) OVER (ORDER BY 
order_id), MIN(qty) OVER (ORDER BY order_id DESC)";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r"
+    Projection: orders.order_id
+      Sort: max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE 
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ASC NULLS LAST, min(orders.qty) 
ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING 
AND CURRENT ROW ASC NULLS LAST
+        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id 
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, 
min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN 
UNBOUNDED PRECEDING AND CURRENT ROW
+          WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id 
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+            WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id 
DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+              TableScan: orders
+    "
+    );
+}
+
+#[test]
+fn window_function_in_order_by_with_group_by() {
+    let sql = "SELECT order_id, SUM(qty) FROM orders GROUP BY order_id ORDER 
BY MAX(SUM(qty)) OVER (ORDER BY order_id)";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r"
+    Projection: orders.order_id, sum(orders.qty)
+      Sort: max(sum(orders.qty)) ORDER BY [orders.order_id ASC NULLS LAST] 
RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ASC NULLS LAST
+        Projection: orders.order_id, sum(orders.qty), max(sum(orders.qty)) 
ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 
CURRENT ROW
+          WindowAggr: windowExpr=[[max(sum(orders.qty)) ORDER BY 
[orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT 
ROW]]
+            Aggregate: groupBy=[[orders.order_id]], aggr=[[sum(orders.qty)]]
+              TableScan: orders
+    "
+    );
+}
+
+#[test]
+fn window_function_in_order_by_with_qualify() {
+    let sql = "SELECT person.id, ROW_NUMBER() OVER (PARTITION BY person.age 
ORDER BY person.id) as rn FROM person QUALIFY rn = 1 ORDER BY ROW_NUMBER() OVER 
(PARTITION BY person.age ORDER BY person.id)";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r"
+    Sort: rn ASC NULLS LAST
+      Projection: person.id, row_number() PARTITION BY [person.age] ORDER BY 
[person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS 
rn
+        Filter: row_number() PARTITION BY [person.age] ORDER BY [person.id ASC 
NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW = Int64(1)
+          WindowAggr: windowExpr=[[row_number() PARTITION BY [person.age] 
ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 
CURRENT ROW]]
+            TableScan: person
+    "
+    );
+}
+
+#[test]
+fn window_function_in_order_by_not_in_select() {
+    let sql =
+        "SELECT order_id FROM orders ORDER BY MIN(qty) OVER (PARTITION BY 
order_id)";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r"
+    Projection: orders.order_id
+      Sort: min(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN 
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ASC NULLS LAST
+        Projection: orders.order_id, min(orders.qty) PARTITION BY 
[orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+          WindowAggr: windowExpr=[[min(orders.qty) PARTITION BY 
[orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+            TableScan: orders
+    "
+    );
+}
+
 #[test]
 fn over_order_by_with_window_frame_single_end() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id ROWS 3 
PRECEDING), MIN(qty) OVER (ORDER BY order_id DESC) from orders";
@@ -4256,6 +4388,16 @@ fn test_select_qualify_without_window_function() {
     );
 }
 
+#[test]
+fn test_select_qualify_without_window_function_but_window_in_order_by() {
+    let sql = "SELECT person.id FROM person QUALIFY person.id > 1 ORDER BY 
ROW_NUMBER() OVER (ORDER BY person.id)";
+    let err = logical_plan(sql).unwrap_err();
+    assert_eq!(
+        err.strip_backtrace(),
+        "Error during planning: QUALIFY clause requires window functions in 
the SELECT list or QUALIFY clause"
+    );
+}
+
 #[test]
 fn test_select_qualify_complex_condition() {
     let sql = "SELECT person.id, person.age, ROW_NUMBER() OVER (PARTITION BY 
person.age ORDER BY person.id) as rn, RANK() OVER (ORDER BY person.salary) as 
rank FROM person QUALIFY rn <= 2 AND rank <= 5";
diff --git a/datafusion/sqllogictest/test_files/window.slt 
b/datafusion/sqllogictest/test_files/window.slt
index 7e8aa753f4..ae83045961 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -5664,6 +5664,56 @@ DROP TABLE new_table;
 statement ok
 DROP TABLE aggregate_test_100_utf8view;
 
+# Window function only in ORDER BY
+query I
+SELECT c2 FROM aggregate_test_100 ORDER BY row_number() OVER (ORDER BY c9) 
LIMIT 5;
+----
+4
+2
+5
+2
+2
+
+# Window function in both SELECT and ORDER BY (deduplication)
+query II
+SELECT c2, row_number() OVER (ORDER BY c9) as rn FROM aggregate_test_100 ORDER 
BY row_number() OVER (ORDER BY c9) LIMIT 5;
+----
+4 1
+2 2
+5 3
+2 4
+2 5
+
+# Nested expression: ORDER BY window_func(...) + 1
+query I
+SELECT c2 FROM aggregate_test_100 ORDER BY row_number() OVER (ORDER BY c9) + 1 
LIMIT 5;
+----
+4
+2
+5
+2
+2
+
+# Multiple window functions in ORDER BY
+query I
+SELECT c2 FROM aggregate_test_100 ORDER BY row_number() OVER (ORDER BY c9), 
max(c3) OVER (ORDER BY c9) LIMIT 5;
+----
+4
+2
+5
+2
+2
+
+# DESC ordering with window function
+query I
+SELECT c2 FROM aggregate_test_100 ORDER BY row_number() OVER (ORDER BY c9) 
DESC LIMIT 5;
+----
+5
+1
+1
+2
+1
+
 statement ok
 DROP TABLE aggregate_test_100
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to