rgehan commented on code in PR #18352:
URL: https://github.com/apache/datafusion/pull/18352#discussion_r2478746309
##########
datafusion/core/tests/dataframe/mod.rs:
##########
@@ -2996,6 +2997,152 @@ async fn test_count_wildcard_on_window() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn
union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_false()
-> Result<()> {
+ assert_snapshot!(
+
union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(false).await?,
+ @r#"
+
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | plan_type | plan
|
+
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | logical_plan | Aggregate: groupBy=[[id]], aggr=[[]]
|
+ | | Union
|
+ | | TableScan: sorted projection=[id]
|
+ | | Sort: unsorted.id ASC NULLS LAST
|
+ | | TableScan: unsorted projection=[id]
|
+ | physical_plan | AggregateExec: mode=Final, gby=[id@0 as id], aggr=[],
ordering_mode=Sorted
|
+ | | SortExec: expr=[id@0 ASC NULLS LAST],
preserve_partitioning=[false]
|
+ | | CoalescePartitionsExec
|
+ | | AggregateExec: mode=Partial, gby=[id@0 as id],
aggr=[]
|
+ | | UnionExec
|
+ | | DataSourceExec: file_groups={1 group:
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id],
output_ordering=[id@0 ASC NULLS LAST], file_type=parquet |
+ | | DataSourceExec: file_groups={1 group:
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
|
+ | |
|
+
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ "#);
+ Ok(())
+}
+
+#[ignore] // See https://github.com/apache/datafusion/issues/18380
+#[tokio::test]
+async fn
union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_true()
-> Result<()> {
+ assert_snapshot!(
+
union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(true).await?,
+ @r#"
+
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | plan_type | plan
|
+
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ | logical_plan | Aggregate: groupBy=[[id]], aggr=[[]]
|
+ | | Union
|
+ | | TableScan: sorted projection=[id]
|
+ | | Sort: unsorted.id ASC NULLS LAST
|
+ | | TableScan: unsorted projection=[id]
|
+ | physical_plan | AggregateExec: mode=Final, gby=[id@0 as id], aggr=[],
ordering_mode=Sorted
|
+ | | SortPreservingMergeExec: [id@0 ASC NULLS LAST]
|
+ | | AggregateExec: mode=Partial, gby=[id@0 as id],
aggr=[], ordering_mode=Sorted
|
+ | | UnionExec
|
+ | | DataSourceExec: file_groups={1 group:
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id],
output_ordering=[id@0 ASC NULLS LAST], file_type=parquet |
+ | | SortExec: expr=[id@0 ASC NULLS LAST],
preserve_partitioning=[false]
|
+ | | DataSourceExec: file_groups={1 group:
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
|
+ | |
|
+
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+ "#);
+
+ // 💥 Doesn't pass, and generates this plan:
+ //
+ // AggregateExec: mode=Final, gby=[id@0 as id], aggr=[],
ordering_mode=Sorted
+ // SortPreservingMergeExec: [id@0 ASC NULLS LAST]
+ // SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true]
+ // AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[]
+ // UnionExec
+ // DataSourceExec: file_groups={1 group:
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id],
output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+ // DataSourceExec: file_groups={1 group:
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
+ //
+ //
+ // === Excerpt from the verbose explain ===
+ //
+ // Physical_plan after EnforceDistribution:
+ //
+ // OutputRequirementExec: order_by=[], dist_by=Unspecified
+ // AggregateExec: mode=Final, gby=[id@0 as id], aggr=[],
ordering_mode=Sorted
+ // SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]
+ // CoalescePartitionsExec
+ // AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[],
ordering_mode=Sorted
+ // UnionExec
+ // DataSourceExec: file_groups={1 group:
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id],
output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+ // SortExec: expr=[id@0 ASC NULLS LAST],
preserve_partitioning=[false]
+ // DataSourceExec: file_groups={1 group:
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
+ //
+ // Physical_plan after EnforceSorting:
Review Comment:
No problem, I've reverted to the excert, but with the intermediate optimizer
step 👍 (it's `CombinePartialFinalAggregate`)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]