avantgardnerio commented on code in PR #7192:
URL: https://github.com/apache/arrow-datafusion/pull/7192#discussion_r1301882495
##########
datafusion/core/tests/sql/select.rs:
##########
@@ -572,6 +574,79 @@ async fn parallel_query_with_filter() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn parallel_query_with_limit() -> Result<()> {
+ let tmp_dir = TempDir::new()?;
+ let partition_count = 4;
+ let ctx = partitioned_csv::create_ctx(&tmp_dir, partition_count).await?;
+
+ let dataframe = ctx
+ .sql("SELECT c3, max(c2) as max FROM test group by c3 order by max
desc limit 2")
+ .await?;
+
+ let actual_logical_plan = format!("{:?}", dataframe.logical_plan());
+ let expected_logical_plan = r#"
+Limit: skip=0, fetch=2
+ Sort: max DESC NULLS FIRST
+ Projection: test.c3, MAX(test.c2) AS max
+ Aggregate: groupBy=[[test.c3]], aggr=[[MAX(test.c2)]]
+ TableScan: test
+ "#
+ .trim();
+ assert_eq!(expected_logical_plan, actual_logical_plan);
+
+ let physical_plan = dataframe.create_physical_plan().await?;
+
+ // TODO: find the GroupedHashAggregateStream node and see if we can assert
bucket count
Review Comment:
Actually, the whole test is pointless now - it was from when I started work
on this story thinking it had to do something with partitioning, in reality it
is partition agnostic (though, it helps a lot when there per-partition
aggregates!).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]