alamb commented on code in PR #17684:
URL: https://github.com/apache/datafusion/pull/17684#discussion_r2372892813
##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -371,17 +371,41 @@ pub(crate) fn window_equivalence_properties(
for (i, expr) in window_exprs.iter().enumerate() {
let partitioning_exprs = expr.partition_by();
let no_partitioning = partitioning_exprs.is_empty();
- // Collect columns defining partitioning, and construct all
`SortOptions`
- // variations for them. Then, we will check each one whether it
satisfies
- // the existing ordering provided by the input plan.
+
+ // Find "one" valid ordering for partition columns to avoid
exponential complexity.
Review Comment:
Context might help too
```suggestion
// Find "one" valid ordering for partition columns to avoid
exponential complexity.
// see https://github.com/apache/datafusion/issues/17401
```
##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -371,17 +371,41 @@ pub(crate) fn window_equivalence_properties(
for (i, expr) in window_exprs.iter().enumerate() {
let partitioning_exprs = expr.partition_by();
let no_partitioning = partitioning_exprs.is_empty();
- // Collect columns defining partitioning, and construct all
`SortOptions`
- // variations for them. Then, we will check each one whether it
satisfies
- // the existing ordering provided by the input plan.
+
+ // Find "one" valid ordering for partition columns to avoid
exponential complexity.
let mut all_satisfied_lexs = vec![];
- for lex in partitioning_exprs
- .iter()
- .map(|pb_order|
sort_options_resolving_constant(Arc::clone(pb_order)))
- .multi_cartesian_product()
- .filter_map(LexOrdering::new)
+ let mut candidate_ordering = vec![];
+
+ for partition_expr in partitioning_exprs.iter() {
+ let sort_options =
+ sort_options_resolving_constant(Arc::clone(partition_expr),
true);
+
+ // Try each sort option and pick the first one that works
+ let mut found = false;
+ for sort_expr in sort_options.iter() {
+ candidate_ordering.push(sort_expr.clone());
+ if let Some(lex) =
LexOrdering::new(candidate_ordering.clone()) {
Review Comment:
Minor: you can avoid some clones I think like
```suggestion
for sort_expr in sort_options.into_iter() {
candidate_ordering.push(sort_expr);
if let Some(lex) =
LexOrdering::new(candidate_ordering.clone()) {
```
##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -467,23 +493,44 @@ pub(crate) fn window_equivalence_properties(
// utilize set-monotonicity since the set shrinks as the frame
// boundary starts "touching" the end of the table.
else if frame.is_causal() {
- let args_all_lexs = sliding_expr
- .get_aggregate_expr()
- .expressions()
- .into_iter()
- .map(sort_options_resolving_constant)
- .multi_cartesian_product();
-
- let (mut asc, mut satisfied) = (false, false);
- for order in args_all_lexs {
- if let Some(f) = order.first() {
- asc = !f.options.descending;
+ // Find one valid ordering for aggregate arguments instead
of
+ // checking all combinations
+ let aggregate_exprs =
sliding_expr.get_aggregate_expr().expressions();
+ let mut candidate_order = vec![];
+ let mut asc = false;
+
+ for (idx, expr) in aggregate_exprs.iter().enumerate() {
+ let mut found = false;
+ let sort_options =
+ sort_options_resolving_constant(Arc::clone(expr),
false);
+
+ // Try each option and pick the first that works
+ for sort_expr in sort_options.iter() {
Review Comment:
Similarly here you could potentially use `sort_options.into_iter()` and save
the clone below. Here is what I used
```diff
@@ -505,13 +505,14 @@ pub(crate) fn window_equivalence_properties(
sort_options_resolving_constant(Arc::clone(expr), false);
// Try each option and pick the first that works
- for sort_expr in sort_options.iter() {
- candidate_order.push(sort_expr.clone());
+ for sort_expr in sort_options.into_iter() {
+ let is_asc = !sort_expr.options.descending;
+ candidate_order.push(sort_expr);
if let Some(lex) =
LexOrdering::new(candidate_order.clone()) {
if
window_eq_properties.ordering_satisfy(lex)? {
if idx == 0 {
- asc = !sort_expr.options.descending;
+ asc = is_asc;
}
found = true;
break;
```
##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -467,23 +493,44 @@ pub(crate) fn window_equivalence_properties(
// utilize set-monotonicity since the set shrinks as the frame
// boundary starts "touching" the end of the table.
else if frame.is_causal() {
- let args_all_lexs = sliding_expr
- .get_aggregate_expr()
- .expressions()
- .into_iter()
- .map(sort_options_resolving_constant)
- .multi_cartesian_product();
-
- let (mut asc, mut satisfied) = (false, false);
- for order in args_all_lexs {
- if let Some(f) = order.first() {
- asc = !f.options.descending;
+ // Find one valid ordering for aggregate arguments instead
of
+ // checking all combinations
+ let aggregate_exprs =
sliding_expr.get_aggregate_expr().expressions();
Review Comment:
FWIW this seems very similar to the loop above, I wonder if there is some
way (as a follow on PR) to factor it out to reduce the replication
##########
datafusion/sqllogictest/test_files/window.slt:
##########
@@ -6034,3 +6034,92 @@ LIMIT 5
0 2 NULL NULL 0 NULL NULL
0 3 NULL NULL 0 NULL NULL
0 4 NULL NULL 0 NULL NULL
+
+# regression test for https://github.com/apache/datafusion/issues/17401
Review Comment:
I ran this locally, and found
both main and This branch -- took 2 seconds
Main
```shell
(venv) andrewlamb@Andrews-MacBook-Pro-3:~/Software/datafusion$ cargo test
--profile=ci --test sqllogictests -- window.slt
Finished `ci` profile [unoptimized + debuginfo] target(s) in 0.20s
Running bin/sqllogictests.rs
(target/ci/deps/sqllogictests-4dcb99f83e94c047)
Completed 2 test files in 2 seconds
```
This branch
```
(venv) andrewlamb@Andrews-MacBook-Pro-3:~/Software/datafusion$ cargo test
--profile=ci --test sqllogictests -- window.slt
Finished `ci` profile [unoptimized + debuginfo] target(s) in 0.19s
Running bin/sqllogictests.rs
(target/ci/deps/sqllogictests-fbba93e4275b7826)
Completed 2 test files in 2 seconds
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]