rgehan commented on code in PR #18352:
URL: https://github.com/apache/datafusion/pull/18352#discussion_r2478746309


##########
datafusion/core/tests/dataframe/mod.rs:
##########
@@ -2996,6 +2997,152 @@ async fn test_count_wildcard_on_window() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn 
union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_false()
 -> Result<()> {
+    assert_snapshot!(
+        
union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(false).await?,
+        @r#"
+    
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                     
                                                                                
                                                                         |
+    
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | logical_plan  | Aggregate: groupBy=[[id]], aggr=[[]]                     
                                                                                
                                                                         |
+    |               |   Union                                                  
                                                                                
                                                                         |
+    |               |     TableScan: sorted projection=[id]                    
                                                                                
                                                                         |
+    |               |     Sort: unsorted.id ASC NULLS LAST                     
                                                                                
                                                                         |
+    |               |       TableScan: unsorted projection=[id]                
                                                                                
                                                                         |
+    | physical_plan | AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], 
ordering_mode=Sorted                                                            
                                                                            |
+    |               |   SortExec: expr=[id@0 ASC NULLS LAST], 
preserve_partitioning=[false]                                                   
                                                                                
          |
+    |               |     CoalescePartitionsExec                               
                                                                                
                                                                         |
+    |               |       AggregateExec: mode=Partial, gby=[id@0 as id], 
aggr=[]                                                                         
                                                                             |
+    |               |         UnionExec                                        
                                                                                
                                                                         |
+    |               |           DataSourceExec: file_groups={1 group: 
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], 
output_ordering=[id@0 ASC NULLS LAST], file_type=parquet |
+    |               |           DataSourceExec: file_groups={1 group: 
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet 
                                       |
+    |               |                                                          
                                                                                
                                                                         |
+    
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    "#);
+    Ok(())
+}
+
+#[ignore] // See https://github.com/apache/datafusion/issues/18380
+#[tokio::test]
+async fn 
union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_true()
 -> Result<()> {
+    assert_snapshot!(
+        
union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(true).await?,
+        @r#"
+    
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                     
                                                                                
                                                                       |
+    
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | logical_plan  | Aggregate: groupBy=[[id]], aggr=[[]]                     
                                                                                
                                                                       |
+    |               |   Union                                                  
                                                                                
                                                                       |
+    |               |     TableScan: sorted projection=[id]                    
                                                                                
                                                                       |
+    |               |     Sort: unsorted.id ASC NULLS LAST                     
                                                                                
                                                                       |
+    |               |       TableScan: unsorted projection=[id]                
                                                                                
                                                                       |
+    | physical_plan | AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], 
ordering_mode=Sorted                                                            
                                                                          |
+    |               |   SortPreservingMergeExec: [id@0 ASC NULLS LAST]         
                                                                                
                                                                       |
+    |               |     AggregateExec: mode=Partial, gby=[id@0 as id], 
aggr=[], ordering_mode=Sorted                                                   
                                                                             |
+    |               |       UnionExec                                          
                                                                                
                                                                       |
+    |               |         DataSourceExec: file_groups={1 group: 
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], 
output_ordering=[id@0 ASC NULLS LAST], file_type=parquet |
+    |               |         SortExec: expr=[id@0 ASC NULLS LAST], 
preserve_partitioning=[false]                                                   
                                                                                
  |
+    |               |           DataSourceExec: file_groups={1 group: 
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet 
                                     |
+    |               |                                                          
                                                                                
                                                                       |
+    
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    "#);
+
+    // 💥 Doesn't pass, and generates this plan:
+    //
+    // AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], 
ordering_mode=Sorted
+    //   SortPreservingMergeExec: [id@0 ASC NULLS LAST]
+    //     SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true]
+    //       AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[]
+    //         UnionExec
+    //           DataSourceExec: file_groups={1 group: 
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], 
output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+    //           DataSourceExec: file_groups={1 group: 
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
+    //
+    //
+    // === Excerpt from the verbose explain ===
+    //
+    // Physical_plan after EnforceDistribution:
+    //
+    // OutputRequirementExec: order_by=[], dist_by=Unspecified
+    //   AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], 
ordering_mode=Sorted
+    //     SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]
+    //       CoalescePartitionsExec
+    //         AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[], 
ordering_mode=Sorted
+    //           UnionExec
+    //             DataSourceExec: file_groups={1 group: 
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], 
output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+    //             SortExec: expr=[id@0 ASC NULLS LAST], 
preserve_partitioning=[false]
+    //               DataSourceExec: file_groups={1 group: 
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
+    //
+    // Physical_plan after EnforceSorting:

Review Comment:
   No problem, I've reverted to the excert, but with the intermediate optimizer 
step 👍 (it's `CombinePartialFinalAggregate`)
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to