jiangzhx commented on code in PR #5485:
URL: https://github.com/apache/arrow-datafusion/pull/5485#discussion_r1128978336


##########
datafusion/core/tests/sql/aggregates.rs:
##########
@@ -99,11 +99,11 @@ async fn aggregate_timestamps_count() -> Result<()> {
     .await;
 
     let expected = vec![
-        "+--------------+---------------+---------------+-------------+",
-        "| COUNT(nanos) | COUNT(micros) | COUNT(millis) | COUNT(secs) |",
-        "+--------------+---------------+---------------+-------------+",
-        "| 3            | 3             | 3             | 3           |",
-        "+--------------+---------------+---------------+-------------+",
+        
"+----------------+-----------------+-----------------+---------------+",
+        "| COUNT(t.nanos) | COUNT(t.micros) | COUNT(t.millis) | COUNT(t.secs) 
|",
+        
"+----------------+-----------------+-----------------+---------------+",
+        "| 3              | 3               | 3               | 3             
|",
+        
"+----------------+-----------------+-----------------+---------------+",

Review Comment:
   yes,i agree with you ,  regular counts output COUNT(nanos) will feel more 
intuitive.
   
   but in the current situation, we need to ensure the output remains 
consistent, regardless of whether the AggregateStatistics optimizer is used or 
not.
   
   at main branch, disable aggregate_statistics or enable will retrun 
difference resutl.
   ```
   #[tokio::test]
   async fn count_test() -> Result<()> {
       let mut ctx = SessionContext::new();
       let disable_aggregate_statistics = true;
   
       //disable or not,this test case should be passed
       if (disable_aggregate_statistics) {
           let with_out_aggregate_statistics = ctx
               .state()
               .physical_optimizers()
               .iter()
               .filter(|optimizer| 
optimizer.as_ref().name().ne("aggregate_statistics"))
               .map(|optimizer| optimizer.clone())
               .collect();
           let state = ctx
               .state()
               .with_physical_optimizer_rules(with_out_aggregate_statistics);
           ctx = SessionContext::with_state(state);
       }
   
       let testdata = datafusion::test_util::parquet_test_data();
       ctx.register_parquet(
           "alltypes_tiny_pages",
           &format!("{testdata}/alltypes_tiny_pages.parquet"),
           ParquetReadOptions::default(),
       )
       .await?;
       let results = ctx
           .sql("SELECT count(id), max(id), min(id) FROM alltypes_tiny_pages")
           .await?
           .collect()
           .await?;
       let expected = vec![
           
"+-------------------------------+-----------------------------+-----------------------------+",
           "| COUNT(alltypes_tiny_pages.id) | MAX(alltypes_tiny_pages.id) | 
MIN(alltypes_tiny_pages.id) |",
           
"+-------------------------------+-----------------------------+-----------------------------+",
           "| 7300                          | 7299                        | 0   
                        |",
           
"+-------------------------------+-----------------------------+-----------------------------+",
       ];
       assert_batches_sorted_eq!(expected, &results);
   
       Ok(())
   }
   
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to