This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new c4562dc7fc [Minor]: support window functions in order by expressions
(#20963)
c4562dc7fc is described below
commit c4562dc7fc961618f3c4c30b4a1037276ee8ba68
Author: Burak Şen <[email protected]>
AuthorDate: Thu Mar 26 00:15:46 2026 +0300
[Minor]: support window functions in order by expressions (#20963)
## Which issue does this PR close?
- Closes #608.
## Rationale for this change
#608 has details but as @alamb describes in
https://github.com/apache/datafusion/issues/608#issuecomment-4034162591
```
SELECT c2
FROM test
ORDER BY max(c3) OVER (ORDER BY c9);
```
fails with:
```
This feature is not implemented: Physical plan does not support logical
expression WindowFunction(WindowFunction { fun: AggregateUDF(AggregateUDF {
inner: Max { signature: Signature { type_signature: UserDefined, volatility:
Immutable, parameter_names: None } } }), params: WindowFunctionParams { args:
[Column(Column { relation: Some(Bare { table: "test" }), name: "c3" })],
partition_by: [], order_by: [Sort { expr: Column(Column { relation: Some(Bare {
table: "test" }), name: "c9" }), [...]
```
## What changes are included in this PR?
- relevant change
- slt and unit tests
## Are these changes tested?
Yes added both unit tests for plan and slt tests for checking output
## Are there any user-facing changes?
Additive user changes that users can now use window in order by
expressions
---
datafusion/sql/src/select.rs | 40 ++++++--
datafusion/sql/tests/sql_integration.rs | 142 ++++++++++++++++++++++++++
datafusion/sqllogictest/test_files/window.slt | 50 +++++++++
3 files changed, 222 insertions(+), 10 deletions(-)
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index bb600a2a50..0e719916f2 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -257,7 +257,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
select_exprs: mut select_exprs_post_aggr,
having_expr: having_expr_post_aggr,
qualify_expr: qualify_expr_post_aggr,
- order_by_exprs: order_by_rex,
+ order_by_exprs: mut order_by_rex,
} = if !group_by_exprs.is_empty() || !aggr_exprs.is_empty() {
self.aggregate(
&base_plan,
@@ -293,14 +293,23 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
plan
};
- // The outer expressions we will search through for window functions.
- // Window functions may be sourced from the SELECT list or from the
QUALIFY expression.
- let windows_expr_haystack = select_exprs_post_aggr
- .iter()
- .chain(qualify_expr_post_aggr.iter());
+ // The window expressions from SELECT and QUALIFY only, used to
validate that
+ // QUALIFY is used with window functions (ORDER BY window functions
don't count).
+ let qualify_window_func_exprs = find_window_exprs(
+ select_exprs_post_aggr
+ .iter()
+ .chain(qualify_expr_post_aggr.iter()),
+ );
+
// All of the window expressions (deduplicated and rewritten to
reference aggregates as
- // columns from input).
- let window_func_exprs = find_window_exprs(windows_expr_haystack);
+ // columns from input). Window functions may be sourced from the
SELECT list, QUALIFY
+ // expression, or ORDER BY.
+ let window_func_exprs = find_window_exprs(
+ select_exprs_post_aggr
+ .iter()
+ .chain(qualify_expr_post_aggr.iter())
+ .chain(order_by_rex.iter().map(|s| &s.expr)),
+ );
// Process window functions after aggregation as they can reference
// aggregate functions in their body
@@ -315,14 +324,25 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
.map(|expr| rebase_expr(expr, &window_func_exprs, &plan))
.collect::<Result<Vec<Expr>>>()?;
+ order_by_rex = order_by_rex
+ .into_iter()
+ .map(|sort_expr| {
+ Ok(sort_expr.with_expr(rebase_expr(
+ &sort_expr.expr,
+ &window_func_exprs,
+ &plan,
+ )?))
+ })
+ .collect::<Result<Vec<_>>>()?;
+
plan
};
// Process QUALIFY clause after window functions
// QUALIFY filters the results of window functions, similar to how
HAVING filters aggregates
let plan = if let Some(qualify_expr) = qualify_expr_post_aggr {
- // Validate that QUALIFY is used with window functions
- if window_func_exprs.is_empty() {
+ // Validate that QUALIFY is used with window functions in SELECT
or QUALIFY
+ if qualify_window_func_exprs.is_empty() {
return plan_err!(
"QUALIFY clause requires window functions in the SELECT
list or QUALIFY clause"
);
diff --git a/datafusion/sql/tests/sql_integration.rs
b/datafusion/sql/tests/sql_integration.rs
index 29c17be69c..346f6929da 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -2805,6 +2805,138 @@ fn over_order_by_with_window_frame_double_end() {
);
}
+#[test]
+fn window_function_only_in_order_by() {
+ let sql = "SELECT order_id FROM orders ORDER BY MAX(qty) OVER (ORDER BY
order_id)";
+ let plan = logical_plan(sql).unwrap();
+ assert_snapshot!(
+ plan,
+ @r"
+ Projection: orders.order_id
+ Sort: max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ASC NULLS LAST
+ Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+ WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+ TableScan: orders
+ "
+ );
+}
+
+#[test]
+fn window_function_in_select_and_order_by() {
+ let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id) FROM orders
ORDER BY MAX(qty) OVER (ORDER BY order_id)";
+ let plan = logical_plan(sql).unwrap();
+ assert_snapshot!(
+ plan,
+ @r"
+ Sort: max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ASC NULLS LAST
+ Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+ WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC
NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+ TableScan: orders
+ "
+ );
+}
+
+#[test]
+fn window_function_in_order_by_nested_expr() {
+ let sql =
+ "SELECT order_id FROM orders ORDER BY MAX(qty) OVER (ORDER BY
order_id) + 1";
+ let plan = logical_plan(sql).unwrap();
+ assert_snapshot!(
+ plan,
+ @r"
+ Projection: orders.order_id
+ Sort: max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + Int64(1) ASC NULLS LAST
+ Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+ WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+ TableScan: orders
+ "
+ );
+}
+
+#[test]
+fn window_function_in_order_by_desc() {
+ let sql =
+ "SELECT order_id FROM orders ORDER BY MAX(qty) OVER (ORDER BY
order_id) DESC";
+ let plan = logical_plan(sql).unwrap();
+ assert_snapshot!(
+ plan,
+ @r"
+ Projection: orders.order_id
+ Sort: max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW DESC NULLS FIRST
+ Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+ WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+ TableScan: orders
+ "
+ );
+}
+
+#[test]
+fn multiple_window_functions_in_order_by() {
+ let sql = "SELECT order_id FROM orders ORDER BY MAX(qty) OVER (ORDER BY
order_id), MIN(qty) OVER (ORDER BY order_id DESC)";
+ let plan = logical_plan(sql).unwrap();
+ assert_snapshot!(
+ plan,
+ @r"
+ Projection: orders.order_id
+ Sort: max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ASC NULLS LAST, min(orders.qty)
ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW ASC NULLS LAST
+ Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW,
min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN
UNBOUNDED PRECEDING AND CURRENT ROW
+ WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id
ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+ WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id
DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+ TableScan: orders
+ "
+ );
+}
+
+#[test]
+fn window_function_in_order_by_with_group_by() {
+ let sql = "SELECT order_id, SUM(qty) FROM orders GROUP BY order_id ORDER
BY MAX(SUM(qty)) OVER (ORDER BY order_id)";
+ let plan = logical_plan(sql).unwrap();
+ assert_snapshot!(
+ plan,
+ @r"
+ Projection: orders.order_id, sum(orders.qty)
+ Sort: max(sum(orders.qty)) ORDER BY [orders.order_id ASC NULLS LAST]
RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ASC NULLS LAST
+ Projection: orders.order_id, sum(orders.qty), max(sum(orders.qty))
ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND
CURRENT ROW
+ WindowAggr: windowExpr=[[max(sum(orders.qty)) ORDER BY
[orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT
ROW]]
+ Aggregate: groupBy=[[orders.order_id]], aggr=[[sum(orders.qty)]]
+ TableScan: orders
+ "
+ );
+}
+
+#[test]
+fn window_function_in_order_by_with_qualify() {
+ let sql = "SELECT person.id, ROW_NUMBER() OVER (PARTITION BY person.age
ORDER BY person.id) as rn FROM person QUALIFY rn = 1 ORDER BY ROW_NUMBER() OVER
(PARTITION BY person.age ORDER BY person.id)";
+ let plan = logical_plan(sql).unwrap();
+ assert_snapshot!(
+ plan,
+ @r"
+ Sort: rn ASC NULLS LAST
+ Projection: person.id, row_number() PARTITION BY [person.age] ORDER BY
[person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS
rn
+ Filter: row_number() PARTITION BY [person.age] ORDER BY [person.id ASC
NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW = Int64(1)
+ WindowAggr: windowExpr=[[row_number() PARTITION BY [person.age]
ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND
CURRENT ROW]]
+ TableScan: person
+ "
+ );
+}
+
+#[test]
+fn window_function_in_order_by_not_in_select() {
+ let sql =
+ "SELECT order_id FROM orders ORDER BY MIN(qty) OVER (PARTITION BY
order_id)";
+ let plan = logical_plan(sql).unwrap();
+ assert_snapshot!(
+ plan,
+ @r"
+ Projection: orders.order_id
+ Sort: min(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ASC NULLS LAST
+ Projection: orders.order_id, min(orders.qty) PARTITION BY
[orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ WindowAggr: windowExpr=[[min(orders.qty) PARTITION BY
[orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+ TableScan: orders
+ "
+ );
+}
+
#[test]
fn over_order_by_with_window_frame_single_end() {
let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id ROWS 3
PRECEDING), MIN(qty) OVER (ORDER BY order_id DESC) from orders";
@@ -4256,6 +4388,16 @@ fn test_select_qualify_without_window_function() {
);
}
+#[test]
+fn test_select_qualify_without_window_function_but_window_in_order_by() {
+ let sql = "SELECT person.id FROM person QUALIFY person.id > 1 ORDER BY
ROW_NUMBER() OVER (ORDER BY person.id)";
+ let err = logical_plan(sql).unwrap_err();
+ assert_eq!(
+ err.strip_backtrace(),
+ "Error during planning: QUALIFY clause requires window functions in
the SELECT list or QUALIFY clause"
+ );
+}
+
#[test]
fn test_select_qualify_complex_condition() {
let sql = "SELECT person.id, person.age, ROW_NUMBER() OVER (PARTITION BY
person.age ORDER BY person.id) as rn, RANK() OVER (ORDER BY person.salary) as
rank FROM person QUALIFY rn <= 2 AND rank <= 5";
diff --git a/datafusion/sqllogictest/test_files/window.slt
b/datafusion/sqllogictest/test_files/window.slt
index 7e8aa753f4..ae83045961 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -5664,6 +5664,56 @@ DROP TABLE new_table;
statement ok
DROP TABLE aggregate_test_100_utf8view;
+# Window function only in ORDER BY
+query I
+SELECT c2 FROM aggregate_test_100 ORDER BY row_number() OVER (ORDER BY c9)
LIMIT 5;
+----
+4
+2
+5
+2
+2
+
+# Window function in both SELECT and ORDER BY (deduplication)
+query II
+SELECT c2, row_number() OVER (ORDER BY c9) as rn FROM aggregate_test_100 ORDER
BY row_number() OVER (ORDER BY c9) LIMIT 5;
+----
+4 1
+2 2
+5 3
+2 4
+2 5
+
+# Nested expression: ORDER BY window_func(...) + 1
+query I
+SELECT c2 FROM aggregate_test_100 ORDER BY row_number() OVER (ORDER BY c9) + 1
LIMIT 5;
+----
+4
+2
+5
+2
+2
+
+# Multiple window functions in ORDER BY
+query I
+SELECT c2 FROM aggregate_test_100 ORDER BY row_number() OVER (ORDER BY c9),
max(c3) OVER (ORDER BY c9) LIMIT 5;
+----
+4
+2
+5
+2
+2
+
+# DESC ordering with window function
+query I
+SELECT c2 FROM aggregate_test_100 ORDER BY row_number() OVER (ORDER BY c9)
DESC LIMIT 5;
+----
+5
+1
+1
+2
+1
+
statement ok
DROP TABLE aggregate_test_100
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]