mingmwang commented on PR #5462:
URL:
https://github.com/apache/arrow-datafusion/pull/5462#issuecomment-1457398702
You need to normalize the sort expr with same sort options to do the dedup.
I remember there was similar dedup logic to the `Sort` exprs in `Window`
functions.
```rust
pub fn generate_sort_key(
partition_by: &[Expr],
order_by: &[Expr],
) -> Result<WindowSortKey> {
let normalized_order_by_keys = order_by
.iter()
.map(|e| match e {
Expr::Sort(Sort { expr, .. }) => {
Ok(Expr::Sort(Sort::new(expr.clone(), true, false)))
}
_ => Err(DataFusionError::Plan(
"Order by only accepts sort expressions".to_string(),
)),
})
.collect::<Result<Vec<_>>>()?;
let mut final_sort_keys = vec![];
let mut is_partition_flag = vec![];
partition_by.iter().for_each(|e| {
// By default, create sort key with ASC is true and NULLS LAST to be
consistent with
// PostgreSQL's rule:
https://www.postgresql.org/docs/current/queries-order.html
let e = e.clone().sort(true, false);
if let Some(pos) = normalized_order_by_keys.iter().position(|key|
key.eq(&e)) {
let order_by_key = &order_by[pos];
if !final_sort_keys.contains(order_by_key) {
final_sort_keys.push(order_by_key.clone());
is_partition_flag.push(true);
}
} else if !final_sort_keys.contains(&e) {
final_sort_keys.push(e);
is_partition_flag.push(true);
}
});
order_by.iter().for_each(|e| {
if !final_sort_keys.contains(e) {
final_sort_keys.push(e.clone());
is_partition_flag.push(false);
}
});
let res = final_sort_keys
.into_iter()
.zip(is_partition_flag)
.map(|(lhs, rhs)| (lhs, rhs))
.collect::<Vec<_>>();
Ok(res)
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]