(arrow-rs) branch main updated: feat: add `row_group_is_[max/min]_value_exact` to StatisticsConverter (#7574)

alamb Fri, 06 Jun 2025 13:30:51 -0700

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git



The following commit(s) were added to refs/heads/main by this push:
     new 9e575bd898 feat: add `row_group_is_[max/min]_value_exact` to 
StatisticsConverter (#7574)
9e575bd898 is described below

commit 9e575bd8986202e1bd53d6c5dd0512de7c93a94b
Author: Yuhan Wang <[email protected]>
AuthorDate: Sat Jun 7 04:30:37 2025 +0800

    feat: add `row_group_is_[max/min]_value_exact` to StatisticsConverter 
(#7574)
    
    # Which issue does this PR close?
    
    <!--
    We generally require a GitHub issue to be filed for all bug fixes and
    enhancements and this helps us generate change logs for our releases.
    You can link an issue to this PR using the GitHub syntax. For example
    `Closes #123` indicates that this PR will close issue #123.
    -->
    
    # Rationale for this change
    
    <!--
    Why are you proposing this change? If this is already explained clearly
    in the issue then this section is not needed.
    Explaining clearly why changes are proposed helps reviewers understand
    your changes and offer better suggestions for fixes.
    -->
    
    As described in
    https://github.com/apache/datafusion/issues/15976#issuecomment-2920132245,
    we can expose the `is_[max/min]_value_exact` flags in
    `StatisticsConverter` in order to justify whether the stats are exact.
    
    # What changes are included in this PR?
    
    <!--
    There is no need to duplicate the description in the issue here but it
    is sometimes worth providing a summary of the individual changes in this
    PR.
    -->
    
    Add `row_group_is_[max/min]_value_exact` to StatisticsConverter, also
    with some changes in the corresponding test files.
    
    # Are there any user-facing changes?
    
    <!--
    If there are user-facing changes then we may require documentation to be
    updated before approving the PR.
    -->
    
    <!---
    If there are any breaking changes to public APIs, please call them out.
    -->
---
 parquet/src/arrow/arrow_reader/statistics.rs |  42 ++++
 parquet/tests/arrow_reader/mod.rs            |  53 ++++-
 parquet/tests/arrow_reader/statistics.rs     | 293 ++++++++++++++++++++++++++-
 3 files changed, 383 insertions(+), 5 deletions(-)

diff --git a/parquet/src/arrow/arrow_reader/statistics.rs 
b/parquet/src/arrow/arrow_reader/statistics.rs
index 09f8ec7cc2..cffa60e62e 100644
--- a/parquet/src/arrow/arrow_reader/statistics.rs
+++ b/parquet/src/arrow/arrow_reader/statistics.rs
@@ -1403,6 +1403,48 @@ impl<'a> StatisticsConverter<'a> {
         max_statistics(data_type, iter, self.physical_type)
     }
 
+    /// Extract the `is_max_value_exact` flags from row group statistics in 
[`RowGroupMetaData`]
+    ///
+    /// See docs on [`Self::row_group_maxes`] for details
+    pub fn row_group_is_max_value_exact<I>(&self, metadatas: I) -> 
Result<BooleanArray>
+    where
+        I: IntoIterator<Item = &'a RowGroupMetaData>,
+    {
+        let Some(parquet_index) = self.parquet_column_index else {
+            let num_row_groups = metadatas.into_iter().count();
+            return Ok(BooleanArray::from_iter(
+                std::iter::repeat(None).take(num_row_groups),
+            ));
+        };
+
+        let is_max_value_exact = metadatas
+            .into_iter()
+            .map(|x| x.column(parquet_index).statistics())
+            .map(|s| s.map(|s| s.max_is_exact()));
+        Ok(BooleanArray::from_iter(is_max_value_exact))
+    }
+
+    /// Extract the `is_min_value_exact` flags from row group statistics in 
[`RowGroupMetaData`]
+    ///
+    /// See docs on [`Self::row_group_mins`] for details
+    pub fn row_group_is_min_value_exact<I>(&self, metadatas: I) -> 
Result<BooleanArray>
+    where
+        I: IntoIterator<Item = &'a RowGroupMetaData>,
+    {
+        let Some(parquet_index) = self.parquet_column_index else {
+            let num_row_groups = metadatas.into_iter().count();
+            return Ok(BooleanArray::from_iter(
+                std::iter::repeat(None).take(num_row_groups),
+            ));
+        };
+
+        let is_min_value_exact = metadatas
+            .into_iter()
+            .map(|x| x.column(parquet_index).statistics())
+            .map(|s| s.map(|s| s.min_is_exact()));
+        Ok(BooleanArray::from_iter(is_min_value_exact))
+    }
+
     /// Extract the null counts from row group statistics in 
[`RowGroupMetaData`]
     ///
     /// See docs on [`Self::row_group_mins`] for details
diff --git a/parquet/tests/arrow_reader/mod.rs 
b/parquet/tests/arrow_reader/mod.rs
index 0e6783583c..21aa1c3f26 100644
--- a/parquet/tests/arrow_reader/mod.rs
+++ b/parquet/tests/arrow_reader/mod.rs
@@ -31,7 +31,9 @@ use chrono::Datelike;
 use chrono::{Duration, TimeDelta};
 use half::f16;
 use parquet::arrow::ArrowWriter;
-use parquet::file::properties::{EnabledStatistics, WriterProperties};
+use parquet::file::properties::{
+    EnabledStatistics, WriterProperties, DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH,
+};
 use std::sync::Arc;
 use tempfile::NamedTempFile;
 
@@ -91,10 +93,20 @@ enum Scenario {
     PeriodsInColumnNames,
     StructArray,
     UTF8,
+    /// UTF8 with max and min values truncated
+    TruncatedUTF8,
     UTF8View,
     BinaryView,
 }
 
+impl Scenario {
+    // If the test scenario needs to set `set_statistics_truncate_length` to 
test
+    // statistics truncation.
+    fn truncate_stats(&self) -> bool {
+        matches!(self, Scenario::TruncatedUTF8)
+    }
+}
+
 fn make_boolean_batch(v: Vec<Option<bool>>) -> RecordBatch {
     let schema = Arc::new(Schema::new(vec![Field::new(
         "bool",
@@ -631,6 +643,8 @@ fn make_dict_batch() -> RecordBatch {
     .unwrap()
 }
 
+/// Create data batches for the given scenario.
+/// `make_test_file_rg` uses the first batch to inference the schema of the 
file.
 fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
     match scenario {
         Scenario::Boolean => {
@@ -987,6 +1001,33 @@ fn create_data_batch(scenario: Scenario) -> 
Vec<RecordBatch> {
                 make_utf8_batch(vec![Some("e"), Some("f"), Some("g"), 
Some("h"), Some("i")]),
             ]
         }
+        Scenario::TruncatedUTF8 => {
+            // Make utf8 batch with strings longer than 64 bytes
+            // to check truncation of row group statistics
+            vec![
+                make_utf8_batch(vec![
+                    Some(&("a".repeat(64) + "1")),
+                    Some(&("b".repeat(64) + "2")),
+                    Some(&("c".repeat(64) + "3")),
+                    None,
+                    Some(&("d".repeat(64) + "4")),
+                ]),
+                make_utf8_batch(vec![
+                    Some(&("e".repeat(64) + "5")),
+                    Some(&("f".repeat(64) + "6")),
+                    Some(&("g".repeat(64) + "7")),
+                    Some(&("h".repeat(64) + "8")),
+                    Some(&("i".repeat(64) + "9")),
+                ]),
+                make_utf8_batch(vec![
+                    Some("j"),
+                    Some("k"),
+                    Some(&("l".repeat(64) + "12")),
+                    Some(&("m".repeat(64) + "13")),
+                    Some(&("n".repeat(64) + "14")),
+                ]),
+            ]
+        }
         Scenario::UTF8View => {
             // Make utf8_view batch including string length <12 and >12 bytes
             // as the internal representation of StringView is differed for 
strings
@@ -1027,11 +1068,15 @@ async fn make_test_file_rg(scenario: Scenario, 
row_per_group: usize) -> NamedTem
         .tempfile()
         .expect("tempfile creation");
 
-    let props = WriterProperties::builder()
+    let mut builder = WriterProperties::builder()
         .set_max_row_group_size(row_per_group)
         .set_bloom_filter_enabled(true)
-        .set_statistics_enabled(EnabledStatistics::Page)
-        .build();
+        .set_statistics_enabled(EnabledStatistics::Page);
+    if scenario.truncate_stats() {
+        // The same as default `column_index_truncate_length` to check both 
stats with one value
+        builder = 
builder.set_statistics_truncate_length(DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH);
+    }
+    let props = builder.build();
 
     let batches = create_data_batch(scenario);
     let schema = batches[0].schema();
diff --git a/parquet/tests/arrow_reader/statistics.rs 
b/parquet/tests/arrow_reader/statistics.rs
index 0eb0fc2b27..7a389fb5eb 100644
--- a/parquet/tests/arrow_reader/statistics.rs
+++ b/parquet/tests/arrow_reader/statistics.rs
@@ -212,6 +212,8 @@ struct Test<'a> {
     expected_max: ArrayRef,
     expected_null_counts: UInt64Array,
     expected_row_counts: Option<UInt64Array>,
+    expected_max_value_exact: BooleanArray,
+    expected_min_value_exact: BooleanArray,
     /// Which column to extract statistics from
     column_name: &'static str,
     /// What statistics should be checked?
@@ -245,6 +247,8 @@ impl Test<'_> {
             expected_max,
             expected_null_counts,
             expected_row_counts,
+            expected_max_value_exact: expected_max_exact,
+            expected_min_value_exact: expected_min_exact,
             column_name,
             check,
         } = self;
@@ -328,6 +332,24 @@ impl Test<'_> {
                 "{column_name}: Mismatch with expected row counts. \
                 Actual: {row_counts:?}. Expected: {expected_row_counts:?}"
             );
+
+            let is_max_value_exact = converter
+                
.row_group_is_max_value_exact(reader.metadata().row_groups().iter())
+                .unwrap();
+            assert_eq!(
+                is_max_value_exact, expected_max_exact,
+                "{column_name}: Mismatch with expected max value exactness. \
+                Actual: {is_max_value_exact:?}. Expected: 
{expected_max_exact:?}"
+            );
+
+            let is_min_value_exact = converter
+                
.row_group_is_min_value_exact(reader.metadata().row_groups().iter())
+                .unwrap();
+            assert_eq!(
+                is_min_value_exact, expected_min_exact,
+                "{column_name}: Mismatch with expected min value exactness. \
+                Actual: {is_min_value_exact:?}. Expected: 
{expected_min_exact:?}"
+            );
         }
     }
 
@@ -354,7 +376,49 @@ impl Test<'_> {
 //
 // Remaining cases
 //   f64::NAN
-// - Using truncated statistics  ("exact min value" and "exact max value" 
https://docs.rs/parquet/latest/parquet/file/statistics/enum.Statistics.html#method.max_is_exact)
+
+#[tokio::test]
+async fn test_max_and_min_value_truncated() {
+    let reader = TestReader {
+        scenario: Scenario::TruncatedUTF8,
+        row_per_group: 5,
+    }
+    .build()
+    .await;
+
+    Test {
+        reader: &reader,
+        // min is truncated to
+        // 1. `"a".repeat(64)`, original value is `"a".repeat(64) + "1"`
+        // 2. `"e".repeat(64)`, original value is `"e".repeat(64) + "5"`
+        // 3. "j", as expected with no truncation
+        expected_min: Arc::new(StringArray::from(vec![
+            &("a".repeat(64)),
+            &("e".repeat(64)),
+            "j",
+        ])),
+        // max is truncated to
+        // 1. `"d".repeat(63) + "e"`, original value is `"d".repeat(64) + "4"`
+        // 2. `"i".repeat(63) + "j"`, original value is `"i".repeat(64) + "6"`
+        // 3. `"n".repeat(63) + "o"`, original value is `"n".repeat(64) + "14"`
+        expected_max: Arc::new(StringArray::from(vec![
+            "d".repeat(63) + "e",
+            "i".repeat(63) + "j",
+            "n".repeat(63) + "o",
+        ])),
+        // no nulls
+        expected_null_counts: UInt64Array::from(vec![1, 0, 0]),
+        // 3 rows
+        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // all max values are truncated
+        expected_max_value_exact: BooleanArray::from(vec![false, false, 
false]),
+        // min values are truncated in the first two row groups
+        expected_min_value_exact: BooleanArray::from(vec![false, false, true]),
+        column_name: "utf8",
+        check: Check::Both,
+    }
+    .run()
+}
 
 #[tokio::test]
 async fn test_one_row_group_without_null() {
@@ -377,6 +441,9 @@ async fn test_one_row_group_without_null() {
         expected_null_counts: UInt64Array::from(vec![0]),
         // 3 rows
         expected_row_counts: Some(UInt64Array::from(vec![3])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true]),
+        expected_min_value_exact: BooleanArray::from(vec![true]),
         column_name: "i64",
         check: Check::Both,
     }
@@ -404,6 +471,9 @@ async fn test_one_row_group_with_null_and_negative() {
         expected_null_counts: UInt64Array::from(vec![2]),
         // 8 rows
         expected_row_counts: Some(UInt64Array::from(vec![8])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true]),
+        expected_min_value_exact: BooleanArray::from(vec![true]),
         column_name: "i64",
         check: Check::Both,
     }
@@ -431,6 +501,9 @@ async fn test_two_row_group_with_null() {
         expected_null_counts: UInt64Array::from(vec![0, 2]),
         // row counts are [10, 5]
         expected_row_counts: Some(UInt64Array::from(vec![10, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "i64",
         check: Check::Both,
     }
@@ -458,6 +531,8 @@ async fn test_two_row_groups_with_all_nulls_in_one() {
         expected_null_counts: UInt64Array::from(vec![1, 3]),
         // row counts are [5, 3]
         expected_row_counts: Some(UInt64Array::from(vec![5, 3])),
+        expected_max_value_exact: BooleanArray::from(vec![true, false]),
+        expected_min_value_exact: BooleanArray::from(vec![true, false]),
         column_name: "i64",
         check: Check::Both,
     }
@@ -489,6 +564,8 @@ async fn test_multiple_data_pages_nulls_and_negatives() {
         expected_max: Arc::new(Int64Array::from(vec![Some(2), Some(6), 
Some(9), None])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 1, 2]),
         expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 2])),
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
false]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
false]),
         column_name: "i64",
         check: Check::DataPage,
     }
@@ -551,6 +628,8 @@ async fn test_data_page_stats_with_all_null_page() {
             expected_max: new_null_array(expected_data_type, 1),
             expected_null_counts: UInt64Array::from(vec![4]),
             expected_row_counts: Some(UInt64Array::from(vec![4])),
+            expected_max_value_exact: BooleanArray::from(vec![false]),
+            expected_min_value_exact: BooleanArray::from(vec![false]),
             column_name: "col",
             check: Check::DataPage,
         }
@@ -585,6 +664,9 @@ async fn test_int_64() {
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
         // row counts are [5, 5, 5, 5]
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "i64",
         check: Check::Both,
     }
@@ -611,6 +693,9 @@ async fn test_int_32() {
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
         // row counts are [5, 5, 5, 5]
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "i32",
         check: Check::Both,
     }
@@ -637,6 +722,9 @@ async fn test_int_16() {
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
         // row counts are [5, 5, 5, 5]
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "i16",
         check: Check::Both,
     }
@@ -663,6 +751,9 @@ async fn test_int_8() {
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
         // row counts are [5, 5, 5, 5]
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "i8",
         check: Check::Both,
     }
@@ -699,6 +790,9 @@ async fn test_float_16() {
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
         // row counts are [5, 5, 5, 5]
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "f",
         check: Check::Both,
     }
@@ -725,6 +819,9 @@ async fn test_float_32() {
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
         // row counts are [5, 5, 5, 5]
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "f",
         check: Check::Both,
     }
@@ -751,6 +848,9 @@ async fn test_float_64() {
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
         // row counts are [5, 5, 5, 5]
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "f",
         check: Check::Both,
     }
@@ -801,6 +901,9 @@ async fn test_timestamp() {
         expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
         // row counts are [5, 5, 5, 5]
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "nanos",
         check: Check::Both,
     }
@@ -830,6 +933,9 @@ async fn test_timestamp() {
         expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
         // row counts are [5, 5, 5, 5]
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "nanos_timezoned",
         check: Check::Both,
     }
@@ -852,6 +958,9 @@ async fn test_timestamp() {
         ])),
         expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "micros",
         check: Check::Both,
     }
@@ -881,6 +990,9 @@ async fn test_timestamp() {
         expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
         // row counts are [5, 5, 5, 5]
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "micros_timezoned",
         check: Check::Both,
     }
@@ -903,6 +1015,9 @@ async fn test_timestamp() {
         ])),
         expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "millis",
         check: Check::Both,
     }
@@ -932,6 +1047,10 @@ async fn test_timestamp() {
         expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
         // row counts are [5, 5, 5, 5]
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+
         column_name: "millis_timezoned",
         check: Check::Both,
     }
@@ -954,6 +1073,10 @@ async fn test_timestamp() {
         ])),
         expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+
         column_name: "seconds",
         check: Check::Both,
     }
@@ -983,6 +1106,10 @@ async fn test_timestamp() {
         expected_null_counts: UInt64Array::from(vec![1, 1, 1, 1]),
         // row counts are [5, 5, 5, 5]
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+
         column_name: "seconds_timezoned",
         check: Check::Both,
     }
@@ -1029,6 +1156,9 @@ async fn test_timestamp_diff_rg_sizes() {
         expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
         // row counts are [8, 8, 4]
         expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "nanos",
         check: Check::Both,
     }
@@ -1056,6 +1186,9 @@ async fn test_timestamp_diff_rg_sizes() {
         expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
         // row counts are [8, 8, 4]
         expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "nanos_timezoned",
         check: Check::Both,
     }
@@ -1076,6 +1209,9 @@ async fn test_timestamp_diff_rg_sizes() {
         ])),
         expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
         expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "micros",
         check: Check::Both,
     }
@@ -1103,6 +1239,9 @@ async fn test_timestamp_diff_rg_sizes() {
         expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
         // row counts are [8, 8, 4]
         expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "micros_timezoned",
         check: Check::Both,
     }
@@ -1123,6 +1262,9 @@ async fn test_timestamp_diff_rg_sizes() {
         ])),
         expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
         expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "millis",
         check: Check::Both,
     }
@@ -1150,6 +1292,9 @@ async fn test_timestamp_diff_rg_sizes() {
         expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
         // row counts are [8, 8, 4]
         expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "millis_timezoned",
         check: Check::Both,
     }
@@ -1170,6 +1315,9 @@ async fn test_timestamp_diff_rg_sizes() {
         ])),
         expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
         expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "seconds",
         check: Check::Both,
     }
@@ -1197,6 +1345,9 @@ async fn test_timestamp_diff_rg_sizes() {
         expected_null_counts: UInt64Array::from(vec![1, 2, 1]),
         // row counts are [8, 8, 4]
         expected_row_counts: Some(UInt64Array::from(vec![8, 8, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "seconds_timezoned",
         check: Check::Both,
     }
@@ -1235,6 +1386,9 @@ async fn test_dates_32_diff_rg_sizes() {
         expected_null_counts: UInt64Array::from(vec![2, 2]),
         // row counts are [13, 7]
         expected_row_counts: Some(UInt64Array::from(vec![13, 7])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "date32",
         check: Check::Both,
     }
@@ -1258,6 +1412,9 @@ async fn test_time32_second_diff_rg_sizes() {
         expected_max: Arc::new(Time32SecondArray::from(vec![18509, 18513, 
18517, 18521])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]), // Assuming 
1 null per row group for simplicity
         expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "second",
         check: Check::Both,
     }
@@ -1285,6 +1442,9 @@ async fn test_time32_millisecond_diff_rg_sizes() {
         ])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]), // Assuming 
1 null per row group for simplicity
         expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "millisecond",
         check: Check::Both,
     }
@@ -1318,6 +1478,9 @@ async fn test_time64_microsecond_diff_rg_sizes() {
         ])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]), // Assuming 
1 null per row group for simplicity
         expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "microsecond",
         check: Check::Both,
     }
@@ -1351,6 +1514,9 @@ async fn test_time64_nanosecond_diff_rg_sizes() {
         ])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]), // Assuming 
1 null per row group for simplicity
         expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
         column_name: "nanosecond",
         check: Check::Both,
     }
@@ -1378,6 +1544,9 @@ async fn test_dates_64_diff_rg_sizes() {
         ])),
         expected_null_counts: UInt64Array::from(vec![2, 2]),
         expected_row_counts: Some(UInt64Array::from(vec![13, 7])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "date64",
         check: Check::Both,
     }
@@ -1406,6 +1575,9 @@ async fn test_uint() {
         expected_max: Arc::new(UInt8Array::from(vec![3, 4, 6, 250, 254])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true, true]),
         column_name: "u8",
         check: Check::Both,
     }
@@ -1417,6 +1589,9 @@ async fn test_uint() {
         expected_max: Arc::new(UInt16Array::from(vec![3, 4, 6, 250, 254])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true, true]),
         column_name: "u16",
         check: Check::Both,
     }
@@ -1428,6 +1603,9 @@ async fn test_uint() {
         expected_max: Arc::new(UInt32Array::from(vec![3, 4, 6, 250, 254])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true, true]),
         column_name: "u32",
         check: Check::Both,
     }
@@ -1439,6 +1617,9 @@ async fn test_uint() {
         expected_max: Arc::new(UInt64Array::from(vec![3, 4, 6, 250, 254])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4, 4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true, true]),
         column_name: "u64",
         check: Check::Both,
     }
@@ -1462,6 +1643,9 @@ async fn test_int32_range() {
         expected_max: Arc::new(Int32Array::from(vec![300000])),
         expected_null_counts: UInt64Array::from(vec![0]),
         expected_row_counts: Some(UInt64Array::from(vec![4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true]),
+        expected_min_value_exact: BooleanArray::from(vec![true]),
         column_name: "i",
         check: Check::Both,
     }
@@ -1485,6 +1669,9 @@ async fn test_uint32_range() {
         expected_max: Arc::new(UInt32Array::from(vec![300000])),
         expected_null_counts: UInt64Array::from(vec![0]),
         expected_row_counts: Some(UInt64Array::from(vec![4])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true]),
+        expected_min_value_exact: BooleanArray::from(vec![true]),
         column_name: "u",
         check: Check::Both,
     }
@@ -1507,6 +1694,9 @@ async fn test_numeric_limits_unsigned() {
         expected_max: Arc::new(UInt8Array::from(vec![100, u8::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "u8",
         check: Check::Both,
     }
@@ -1518,6 +1708,9 @@ async fn test_numeric_limits_unsigned() {
         expected_max: Arc::new(UInt16Array::from(vec![100, u16::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "u16",
         check: Check::Both,
     }
@@ -1529,6 +1722,9 @@ async fn test_numeric_limits_unsigned() {
         expected_max: Arc::new(UInt32Array::from(vec![100, u32::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "u32",
         check: Check::Both,
     }
@@ -1540,6 +1736,9 @@ async fn test_numeric_limits_unsigned() {
         expected_max: Arc::new(UInt64Array::from(vec![100, u64::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "u64",
         check: Check::Both,
     }
@@ -1562,6 +1761,9 @@ async fn test_numeric_limits_signed() {
         expected_max: Arc::new(Int8Array::from(vec![100, i8::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "i8",
         check: Check::Both,
     }
@@ -1573,6 +1775,9 @@ async fn test_numeric_limits_signed() {
         expected_max: Arc::new(Int16Array::from(vec![100, i16::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "i16",
         check: Check::Both,
     }
@@ -1584,6 +1789,9 @@ async fn test_numeric_limits_signed() {
         expected_max: Arc::new(Int32Array::from(vec![100, i32::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "i32",
         check: Check::Both,
     }
@@ -1595,6 +1803,9 @@ async fn test_numeric_limits_signed() {
         expected_max: Arc::new(Int64Array::from(vec![100, i64::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "i64",
         check: Check::Both,
     }
@@ -1617,6 +1828,9 @@ async fn test_numeric_limits_float() {
         expected_max: Arc::new(Float32Array::from(vec![100.0, f32::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "f32",
         check: Check::Both,
     }
@@ -1628,6 +1842,9 @@ async fn test_numeric_limits_float() {
         expected_max: Arc::new(Float64Array::from(vec![100.0, f64::MAX])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "f64",
         check: Check::Both,
     }
@@ -1639,6 +1856,9 @@ async fn test_numeric_limits_float() {
         expected_max: Arc::new(Float32Array::from(vec![100.0, -100.0])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "f32_nan",
         check: Check::Both,
     }
@@ -1650,6 +1870,9 @@ async fn test_numeric_limits_float() {
         expected_max: Arc::new(Float64Array::from(vec![100.0, -100.0])),
         expected_null_counts: UInt64Array::from(vec![0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "f64_nan",
         check: Check::Both,
     }
@@ -1673,6 +1896,10 @@ async fn test_float64() {
         expected_max: Arc::new(Float64Array::from(vec![-1.0, 0.0, 4.0, 9.0])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+
         column_name: "f",
         check: Check::Both,
     }
@@ -1706,6 +1933,10 @@ async fn test_float16() {
         )),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true, 
true]),
+
         column_name: "f",
         check: Check::Both,
     }
@@ -1737,6 +1968,9 @@ async fn test_decimal() {
         ),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "decimal_col",
         check: Check::Both,
     }
@@ -1767,6 +2001,9 @@ async fn test_decimal_256() {
         ),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "decimal256_col",
         check: Check::Both,
     }
@@ -1787,6 +2024,9 @@ async fn test_dictionary() {
         expected_max: Arc::new(StringArray::from(vec!["def", "fffff"])),
         expected_null_counts: UInt64Array::from(vec![1, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "string_dict_i8",
         check: Check::Both,
     }
@@ -1798,6 +2038,9 @@ async fn test_dictionary() {
         expected_max: Arc::new(StringArray::from(vec!["def", "fffff"])),
         expected_null_counts: UInt64Array::from(vec![1, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "string_dict_i32",
         check: Check::Both,
     }
@@ -1809,6 +2052,9 @@ async fn test_dictionary() {
         expected_max: Arc::new(Int64Array::from(vec![0, 100])),
         expected_null_counts: UInt64Array::from(vec![1, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "int_dict_i8",
         check: Check::Both,
     }
@@ -1847,6 +2093,9 @@ async fn test_byte() {
         ])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "name",
         check: Check::Both,
     }
@@ -1867,6 +2116,9 @@ async fn test_byte() {
         ])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "service_string",
         check: Check::Both,
     }
@@ -1886,6 +2138,9 @@ async fn test_byte() {
         expected_max: 
Arc::new(BinaryArray::from(expected_service_binary_max_values)),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "service_binary",
         check: Check::Both,
     }
@@ -1903,6 +2158,9 @@ async fn test_byte() {
         expected_max: 
Arc::new(FixedSizeBinaryArray::try_from_iter(max_input.into_iter()).unwrap()),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "service_fixedsize",
         check: Check::Both,
     }
@@ -1924,6 +2182,9 @@ async fn test_byte() {
         )),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "service_large_binary",
         check: Check::Both,
     }
@@ -1957,6 +2218,9 @@ async fn test_period_in_column_names() {
         ])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "name",
         check: Check::Both,
     }
@@ -1969,6 +2233,9 @@ async fn test_period_in_column_names() {
         expected_max: Arc::new(StringArray::from(vec!["frontend", "frontend", 
"backend"])),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "service.name",
         check: Check::Both,
     }
@@ -1993,6 +2260,9 @@ async fn test_boolean() {
         expected_max: Arc::new(BooleanArray::from(vec![true, false])),
         expected_null_counts: UInt64Array::from(vec![1, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "bool",
         check: Check::Both,
     }
@@ -2020,6 +2290,8 @@ async fn test_struct() {
         expected_max: Arc::new(struct_array(vec![(Some(2), Some(8.5), 
Some(14.0))])),
         expected_null_counts: UInt64Array::from(vec![0]),
         expected_row_counts: Some(UInt64Array::from(vec![3])),
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "struct",
         check: Check::RowGroup,
     }
@@ -2043,6 +2315,9 @@ async fn test_utf8() {
         expected_max: Arc::new(StringArray::from(vec!["d", "i"])),
         expected_null_counts: UInt64Array::from(vec![1, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "utf8",
         check: Check::Both,
     }
@@ -2055,6 +2330,9 @@ async fn test_utf8() {
         expected_max: Arc::new(LargeStringArray::from(vec!["d", "i"])),
         expected_null_counts: UInt64Array::from(vec![1, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "large_utf8",
         check: Check::Both,
     }
@@ -2082,6 +2360,9 @@ async fn test_utf8_view() {
         ])),
         expected_null_counts: UInt64Array::from(vec![1, 3, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "utf8_view",
         check: Check::Both,
     }
@@ -2109,6 +2390,9 @@ async fn test_binary_view() {
         expected_max: Arc::new(BinaryViewArray::from(expected_max)),
         expected_null_counts: UInt64Array::from(vec![1, 3, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "binary_view",
         check: Check::Both,
     }
@@ -2135,6 +2419,8 @@ async fn test_missing_statistics() {
         expected_max: Arc::new(Int64Array::from(vec![None])),
         expected_null_counts: UInt64Array::from(vec![None]),
         expected_row_counts: Some(UInt64Array::from(vec![3])), // still has 
row count statistics
+        expected_max_value_exact: BooleanArray::from(vec![None]),
+        expected_min_value_exact: BooleanArray::from(vec![None]),
         column_name: "i64",
         check: Check::Both,
     }
@@ -2216,6 +2502,9 @@ async fn test_column_not_found() {
         expected_max: Arc::new(Int64Array::from(vec![18564, 21865])),
         expected_null_counts: UInt64Array::from(vec![2, 2]),
         expected_row_counts: Some(UInt64Array::from(vec![13, 7])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true]),
         column_name: "not_a_column",
         check: Check::Both,
     }
@@ -2251,6 +2540,8 @@ async fn test_column_non_existent() {
         expected_null_counts: UInt64Array::from(vec![None, None, None, None]),
         // row counts are [5, 5, 5, 5]
         expected_row_counts: None,
+        expected_max_value_exact: BooleanArray::from(vec![None, None, None, 
None]),
+        expected_min_value_exact: BooleanArray::from(vec![None, None, None, 
None]),
         column_name: "i_do_not_exist",
         check: Check::Both,
     }

(arrow-rs) branch main updated: feat: add `row_group_is_[max/min]_value_exact` to StatisticsConverter (#7574)

Reply via email to