jackwener commented on code in PR #4643:
URL: https://github.com/apache/arrow-datafusion/pull/4643#discussion_r1050815168
##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -249,16 +250,29 @@ impl LogicalPlanBuilder {
) -> Result<LogicalPlan> {
let mut plan = input;
let mut groups = group_window_expr_by_sort_keys(&window_exprs)?;
- // sort by sort_key len descending, so that more deeply sorted plans
gets nested further
- // down as children; to further mimic the behavior of PostgreSQL, we
want stable sort
- // and a reverse so that tieing sort keys are reversed in order; note
that by this rule
- // if there's an empty over, it'll be at the top level
- groups.sort_by(|(key_a, _), (key_b, _)| key_a.len().cmp(&key_b.len()));
- groups.reverse();
+ // To align with the behavior of PostgreSQL, we want the sort_keys
sorted as same rule as PostgreSQL that first
+ // we compare the sort key themselves and if one window's sort keys
are a prefix of another
+ // put the window with more sort keys first. so more deeply sorted
plans gets nested further down as children.
+ // The sort_by() implementation here is a stable sort.
+ // Note that by this rule if there's an empty over, it'll be at the
top level
Review Comment:
👍
##########
datafusion/core/tests/sql/select.rs:
##########
@@ -835,6 +835,88 @@ async fn query_on_string_dictionary() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn sort_on_window_null_string() -> Result<()> {
+ let d1: DictionaryArray<Int32Type> =
+ vec![Some("one"), None, Some("three")].into_iter().collect();
+ let d2: StringArray = vec![Some("ONE"), None,
Some("THREE")].into_iter().collect();
+ let d3: LargeStringArray =
+ vec![Some("One"), None, Some("Three")].into_iter().collect();
+
+ let batch = RecordBatch::try_from_iter(vec![
+ ("d1", Arc::new(d1) as ArrayRef),
+ ("d2", Arc::new(d2) as ArrayRef),
+ ("d3", Arc::new(d3) as ArrayRef),
+ ])
+ .unwrap();
+
+ let ctx =
SessionContext::with_config(SessionConfig::new().with_target_partitions(2));
+ ctx.register_batch("test", batch)?;
+
+ let sql =
+ "SELECT d1, row_number() OVER (partition by d1) as rn1 FROM test order
by d1 asc";
+
+ let actual = execute_to_batches(&ctx, sql).await;
+ // NULLS LAST
+ let expected = vec![
+ "+-------+-----+",
+ "| d1 | rn1 |",
+ "+-------+-----+",
+ "| one | 1 |",
+ "| three | 1 |",
+ "| | 1 |",
+ "+-------+-----+",
+ ];
+ assert_batches_eq!(expected, &actual);
+
+ let sql = "SELECT d2, row_number() OVER (partition by d2) as rn1 FROM
test";
+ let actual = execute_to_batches(&ctx, sql).await;
+ // NULLS LAST
+ let expected = vec![
+ "+-------+-----+",
+ "| d2 | rn1 |",
+ "+-------+-----+",
+ "| ONE | 1 |",
+ "| THREE | 1 |",
+ "| | 1 |",
+ "+-------+-----+",
+ ];
+ assert_batches_eq!(expected, &actual);
+
+ let sql =
+ "SELECT d2, row_number() OVER (partition by d2 order by d2 desc) as
rn1 FROM test";
+
+ let actual = execute_to_batches(&ctx, sql).await;
+ // NULLS FIRST
+ let expected = vec![
+ "+-------+-----+",
+ "| d2 | rn1 |",
+ "+-------+-----+",
+ "| | 1 |",
+ "| THREE | 1 |",
+ "| ONE | 1 |",
+ "+-------+-----+",
+ ];
+ assert_batches_eq!(expected, &actual);
+
+ // FIXME sort on LargeUtf8 String has bug.
+ // let sql =
+ // "SELECT d3, row_number() OVER (partition by d3) as rn1 FROM test";
+ // let actual = execute_to_batches(&ctx, sql).await;
+ // let expected = vec![
+ // "+-------+-----+",
+ // "| d3 | rn1 |",
+ // "+-------+-----+",
+ // "| | 1 |",
+ // "| One | 1 |",
+ // "| Three | 1 |",
+ // "+-------+-----+",
+ // ];
+ // assert_batches_eq!(expected, &actual);
Review Comment:
👍
##########
datafusion/core/tests/sql/select.rs:
##########
@@ -835,6 +835,88 @@ async fn query_on_string_dictionary() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn sort_on_window_null_string() -> Result<()> {
+ let d1: DictionaryArray<Int32Type> =
+ vec![Some("one"), None, Some("three")].into_iter().collect();
+ let d2: StringArray = vec![Some("ONE"), None,
Some("THREE")].into_iter().collect();
+ let d3: LargeStringArray =
+ vec![Some("One"), None, Some("Three")].into_iter().collect();
+
+ let batch = RecordBatch::try_from_iter(vec![
+ ("d1", Arc::new(d1) as ArrayRef),
+ ("d2", Arc::new(d2) as ArrayRef),
+ ("d3", Arc::new(d3) as ArrayRef),
+ ])
+ .unwrap();
+
+ let ctx =
SessionContext::with_config(SessionConfig::new().with_target_partitions(2));
+ ctx.register_batch("test", batch)?;
+
+ let sql =
+ "SELECT d1, row_number() OVER (partition by d1) as rn1 FROM test order
by d1 asc";
+
+ let actual = execute_to_batches(&ctx, sql).await;
+ // NULLS LAST
+ let expected = vec![
+ "+-------+-----+",
+ "| d1 | rn1 |",
+ "+-------+-----+",
+ "| one | 1 |",
+ "| three | 1 |",
+ "| | 1 |",
+ "+-------+-----+",
+ ];
+ assert_batches_eq!(expected, &actual);
+
+ let sql = "SELECT d2, row_number() OVER (partition by d2) as rn1 FROM
test";
+ let actual = execute_to_batches(&ctx, sql).await;
+ // NULLS LAST
+ let expected = vec![
+ "+-------+-----+",
+ "| d2 | rn1 |",
+ "+-------+-----+",
+ "| ONE | 1 |",
+ "| THREE | 1 |",
+ "| | 1 |",
+ "+-------+-----+",
+ ];
+ assert_batches_eq!(expected, &actual);
+
+ let sql =
+ "SELECT d2, row_number() OVER (partition by d2 order by d2 desc) as
rn1 FROM test";
+
+ let actual = execute_to_batches(&ctx, sql).await;
+ // NULLS FIRST
+ let expected = vec![
+ "+-------+-----+",
+ "| d2 | rn1 |",
+ "+-------+-----+",
+ "| | 1 |",
+ "| THREE | 1 |",
+ "| ONE | 1 |",
+ "+-------+-----+",
+ ];
+ assert_batches_eq!(expected, &actual);
+
+ // FIXME sort on LargeUtf8 String has bug.
+ // let sql =
+ // "SELECT d3, row_number() OVER (partition by d3) as rn1 FROM test";
+ // let actual = execute_to_batches(&ctx, sql).await;
+ // let expected = vec![
+ // "+-------+-----+",
+ // "| d3 | rn1 |",
+ // "+-------+-----+",
+ // "| | 1 |",
+ // "| One | 1 |",
+ // "| Three | 1 |",
+ // "+-------+-----+",
+ // ];
+ // assert_batches_eq!(expected, &actual);
Review Comment:
BTW, we can add some test in `sqllogicaltest`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]