Re: [PR] Minor: Improve `arrow_statistics` tests [datafusion]

via GitHub Sat, 15 Jun 2024 09:24:11 -0700


alamb commented on code in PR #10927:
URL: https://github.com/apache/datafusion/pull/10927#discussion_r1641278953



##########
datafusion/core/tests/parquet/arrow_statistics.rs:
##########
@@ -164,6 +164,36 @@ impl TestReader {
     }
 }
 
+/// Which statistics should we check?

Review Comment:
   I think this is easier to understand (and now it is at least documented why 
sometimes the page stats are the same and some they are different)



##########
datafusion/core/tests/parquet/arrow_statistics.rs:
##########
@@ -482,22 +513,20 @@ async fn test_int_64() {
     .await;
 
     // since each row has only one data page, the statistics are the same
-    for test_data_page_statistics in [true, false] {
-        Test {
-            reader: &reader,
-            // mins are [-5, -4, 0, 5]
-            expected_min: Arc::new(Int64Array::from(vec![-5, -4, 0, 5])),
-            // maxes are [-1, 0, 4, 9]
-            expected_max: Arc::new(Int64Array::from(vec![-1, 0, 4, 9])),
-            // nulls are [0, 0, 0, 0]
-            expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
-            // row counts are [5, 5, 5, 5]
-            expected_row_counts: UInt64Array::from(vec![5, 5, 5, 5]),
-            column_name: "i64",
-            test_data_page_statistics,
-        }
-        .run();
+    Test {
+        reader: &reader,
+        // mins are [-5, -4, 0, 5]
+        expected_min: Arc::new(Int64Array::from(vec![-5, -4, 0, 5])),
+        // maxes are [-1, 0, 4, 9]
+        expected_max: Arc::new(Int64Array::from(vec![-1, 0, 4, 9])),
+        // nulls are [0, 0, 0, 0]
+        expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
+        // row counts are [5, 5, 5, 5]
+        expected_row_counts: UInt64Array::from(vec![5, 5, 5, 5]),
+        column_name: "i64",
+        check: Check::Both,

Review Comment:
   I thought `Both` was easier to understand here than the loop -- and will be 
a better pattern to follow to add supprot for other types



##########
datafusion/core/tests/parquet/arrow_statistics.rs:
##########
@@ -259,13 +288,16 @@ impl<'a> Test<'a> {
             let row_counts = converter
                 .data_page_row_counts(column_offset_index, row_groups, 
&row_group_indices)
                 .unwrap();
-            let expected_row_counts = Arc::new(expected_row_counts) as 
ArrayRef;
+            // https://github.com/apache/datafusion/issues/10926
+            let expected_row_counts: ArrayRef = 
Arc::new(expected_row_counts.clone());
             assert_eq!(
                 &row_counts, &expected_row_counts,
                 "{column_name}: Mismatch with expected row counts. \
                 Actual: {row_counts:?}. Expected: {expected_row_counts:?}"
             );
-        } else {
+        }

Review Comment:
   The code can now check either or both types of statistics



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Minor: Improve `arrow_statistics` tests [datafusion]

Reply via email to