This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 69b17ad402 parquet Statistics - deprecate `has_*` APIs and add `_opt`
functions that return `Option<T>` (#6216)
69b17ad402 is described below
commit 69b17ad40294003b9dfd25d85f2523abd1a0034e
Author: Michael J Ward <[email protected]>
AuthorDate: Thu Aug 15 12:00:55 2024 -0500
parquet Statistics - deprecate `has_*` APIs and add `_opt` functions that
return `Option<T>` (#6216)
* update public api Statistics::min to return an option.
I first re-named the existing method to `min_unchecked` and made it
internal to the crate.
I then added a `pub min(&self) -> Opiton<&T>` method.
I figure we can first change the public API before deciding what to do
about internal usage.
Ref: https://github.com/apache/arrow-rs/issues/6093
* update public api Statistics::max to return an option.
I first re-named the existing method to `max_unchecked` and made it
internal to the crate.
I then added a `pub max(&self) -> Opiton<&T>` method.
I figure we can first change the public API before deciding what to do
about internal usage.
Ref: https://github.com/apache/arrow-rs/issues/6093
* cargo fmt
* remove Statistics::has_min_max_set from the public api
Ref: https://github.com/apache/arrow-rs/issues/6093
* update impl HeapSize for ValueStatistics to use new min and max api
* migrate all tests to new Statistics min and max api
* make Statistics::null_count return Option<u64>
This removes ambiguity around whether the between all values are non-null
or just that the null count stat is missing
Ref: https://github.com/apache/arrow-rs/issues/6215
* update expected metadata memory size tests
Changing null_count from u64 to Option<u64> increases the memory size and
layout of the metadata.
I included these tests as a separate commit to call extra attention to it.
* add TODO question on is_min_max_backwards_compatible
* Apply suggestions from code review
Co-authored-by: Andrew Lamb <[email protected]>
* update ValueStatistics::max docs
* rename new optional ValueStatistics::max to max_opt
Per PR review, we will deprecate the old API instead of introducing a
brekaing change.
Ref:
https://github.com/apache/arrow-rs/pull/6216#pullrequestreview-2236537291
* rename new optional ValueStatistics::min to min_opt
* add Statistics:{min,max}_bytes_opt
This adds the API and migrates all of the test usage.
The old APIs will be deprecated next.
* update make_stats_iterator macro to use *_opt methods
* deprecate non *_opt Statistics and ValueStatistics methods
* remove stale TODO comments
* remove has_min_max_set check from make_decimal_stats_iterator
The check is unnecessary now that the stats funcs return Option<T> when
unset.
* deprecate has_min_max_set
An internal version was also created because it is used so extensively in
testing.
* switch to null_count_opt and reintroduce deprecated null_count and
has_nulls
* remove redundant test assertions of stats._internal_has_min_max_set
This removes the assertion from any test that subsequently unwraps both
min_opt and max_opt.
* replace negated test assertions of stats._internal_has_mix_max_set with
assertions on min_opt and max_opt
This removes all use of Statistics::_internal_has_min_max_set from the code
base, and so it is also removed.
* Revert changes to parquet writing, update comments
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
parquet/src/arrow/arrow_reader/statistics.rs | 109 ++++++---
parquet/src/arrow/arrow_writer/mod.rs | 32 ++-
parquet/src/column/page.rs | 10 +-
parquet/src/column/writer/mod.rs | 322 +++++++++++++-------------
parquet/src/file/metadata/memory.rs | 10 +-
parquet/src/file/metadata/mod.rs | 14 +-
parquet/src/file/serialized_reader.rs | 5 +-
parquet/src/file/statistics.rs | 332 +++++++++++++++++++--------
parquet/src/file/writer.rs | 6 +-
9 files changed, 521 insertions(+), 319 deletions(-)
diff --git a/parquet/src/arrow/arrow_reader/statistics.rs
b/parquet/src/arrow/arrow_reader/statistics.rs
index d487967c23..602a9ad5e5 100644
--- a/parquet/src/arrow/arrow_reader/statistics.rs
+++ b/parquet/src/arrow/arrow_reader/statistics.rs
@@ -116,7 +116,7 @@ macro_rules! make_stats_iterator {
let next = self.iter.next();
next.map(|x| {
x.and_then(|stats| match stats {
- $parquet_statistics_type(s) if stats.has_min_max_set()
=> Some(s.$func()),
+ $parquet_statistics_type(s) => s.$func(),
_ => None,
})
})
@@ -131,45 +131,85 @@ macro_rules! make_stats_iterator {
make_stats_iterator!(
MinBooleanStatsIterator,
- min,
+ min_opt,
ParquetStatistics::Boolean,
bool
);
make_stats_iterator!(
MaxBooleanStatsIterator,
- max,
+ max_opt,
ParquetStatistics::Boolean,
bool
);
-make_stats_iterator!(MinInt32StatsIterator, min, ParquetStatistics::Int32,
i32);
-make_stats_iterator!(MaxInt32StatsIterator, max, ParquetStatistics::Int32,
i32);
-make_stats_iterator!(MinInt64StatsIterator, min, ParquetStatistics::Int64,
i64);
-make_stats_iterator!(MaxInt64StatsIterator, max, ParquetStatistics::Int64,
i64);
-make_stats_iterator!(MinFloatStatsIterator, min, ParquetStatistics::Float,
f32);
-make_stats_iterator!(MaxFloatStatsIterator, max, ParquetStatistics::Float,
f32);
-make_stats_iterator!(MinDoubleStatsIterator, min, ParquetStatistics::Double,
f64);
-make_stats_iterator!(MaxDoubleStatsIterator, max, ParquetStatistics::Double,
f64);
+make_stats_iterator!(
+ MinInt32StatsIterator,
+ min_opt,
+ ParquetStatistics::Int32,
+ i32
+);
+make_stats_iterator!(
+ MaxInt32StatsIterator,
+ max_opt,
+ ParquetStatistics::Int32,
+ i32
+);
+make_stats_iterator!(
+ MinInt64StatsIterator,
+ min_opt,
+ ParquetStatistics::Int64,
+ i64
+);
+make_stats_iterator!(
+ MaxInt64StatsIterator,
+ max_opt,
+ ParquetStatistics::Int64,
+ i64
+);
+make_stats_iterator!(
+ MinFloatStatsIterator,
+ min_opt,
+ ParquetStatistics::Float,
+ f32
+);
+make_stats_iterator!(
+ MaxFloatStatsIterator,
+ max_opt,
+ ParquetStatistics::Float,
+ f32
+);
+make_stats_iterator!(
+ MinDoubleStatsIterator,
+ min_opt,
+ ParquetStatistics::Double,
+ f64
+);
+make_stats_iterator!(
+ MaxDoubleStatsIterator,
+ max_opt,
+ ParquetStatistics::Double,
+ f64
+);
make_stats_iterator!(
MinByteArrayStatsIterator,
- min_bytes,
+ min_bytes_opt,
ParquetStatistics::ByteArray,
[u8]
);
make_stats_iterator!(
MaxByteArrayStatsIterator,
- max_bytes,
+ max_bytes_opt,
ParquetStatistics::ByteArray,
[u8]
);
make_stats_iterator!(
MinFixedLenByteArrayStatsIterator,
- min_bytes,
+ min_bytes_opt,
ParquetStatistics::FixedLenByteArray,
[u8]
);
make_stats_iterator!(
MaxFixedLenByteArrayStatsIterator,
- max_bytes,
+ max_bytes_opt,
ParquetStatistics::FixedLenByteArray,
[u8]
);
@@ -218,19 +258,18 @@ macro_rules! make_decimal_stats_iterator {
fn next(&mut self) -> Option<Self::Item> {
let next = self.iter.next();
next.map(|x| {
- x.and_then(|stats| {
- if !stats.has_min_max_set() {
- return None;
+ x.and_then(|stats| match stats {
+ ParquetStatistics::Int32(s) => {
+ s.$func().map(|x| $stat_value_type::from(*x))
}
- match stats {
- ParquetStatistics::Int32(s) =>
Some($stat_value_type::from(*s.$func())),
- ParquetStatistics::Int64(s) =>
Some($stat_value_type::from(*s.$func())),
- ParquetStatistics::ByteArray(s) =>
Some($convert_func(s.$bytes_func())),
- ParquetStatistics::FixedLenByteArray(s) => {
- Some($convert_func(s.$bytes_func()))
- }
- _ => None,
+ ParquetStatistics::Int64(s) => {
+ s.$func().map(|x| $stat_value_type::from(*x))
}
+ ParquetStatistics::ByteArray(s) =>
s.$bytes_func().map($convert_func),
+ ParquetStatistics::FixedLenByteArray(s) => {
+ s.$bytes_func().map($convert_func)
+ }
+ _ => None,
})
})
}
@@ -244,29 +283,29 @@ macro_rules! make_decimal_stats_iterator {
make_decimal_stats_iterator!(
MinDecimal128StatsIterator,
- min,
- min_bytes,
+ min_opt,
+ min_bytes_opt,
i128,
from_bytes_to_i128
);
make_decimal_stats_iterator!(
MaxDecimal128StatsIterator,
- max,
- max_bytes,
+ max_opt,
+ max_bytes_opt,
i128,
from_bytes_to_i128
);
make_decimal_stats_iterator!(
MinDecimal256StatsIterator,
- min,
- min_bytes,
+ min_opt,
+ min_bytes_opt,
i256,
from_bytes_to_i256
);
make_decimal_stats_iterator!(
MaxDecimal256StatsIterator,
- max,
- max_bytes,
+ max_opt,
+ max_bytes_opt,
i256,
from_bytes_to_i256
);
@@ -1347,7 +1386,7 @@ impl<'a> StatisticsConverter<'a> {
let null_counts = metadatas
.into_iter()
.map(|x| x.column(parquet_index).statistics())
- .map(|s| s.map(|s| s.null_count()));
+ .map(|s| s.and_then(|s| s.null_count_opt()));
Ok(UInt64Array::from_iter(null_counts))
}
diff --git a/parquet/src/arrow/arrow_writer/mod.rs
b/parquet/src/arrow/arrow_writer/mod.rs
index 0c07f541bd..5c5f28ac0f 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -2541,10 +2541,15 @@ mod tests {
row_offset += column.num_values() as usize;
let stats = column.statistics().unwrap();
- assert!(stats.has_min_max_set());
if let Statistics::Int32(stats) = stats {
- assert_eq!(*stats.min() as u32,
*src_slice.iter().min().unwrap());
- assert_eq!(*stats.max() as u32,
*src_slice.iter().max().unwrap());
+ assert_eq!(
+ *stats.min_opt().unwrap() as u32,
+ *src_slice.iter().min().unwrap()
+ );
+ assert_eq!(
+ *stats.max_opt().unwrap() as u32,
+ *src_slice.iter().max().unwrap()
+ );
} else {
panic!("Statistics::Int32 missing")
}
@@ -2582,10 +2587,15 @@ mod tests {
row_offset += column.num_values() as usize;
let stats = column.statistics().unwrap();
- assert!(stats.has_min_max_set());
if let Statistics::Int64(stats) = stats {
- assert_eq!(*stats.min() as u64,
*src_slice.iter().min().unwrap());
- assert_eq!(*stats.max() as u64,
*src_slice.iter().max().unwrap());
+ assert_eq!(
+ *stats.min_opt().unwrap() as u64,
+ *src_slice.iter().min().unwrap()
+ );
+ assert_eq!(
+ *stats.max_opt().unwrap() as u64,
+ *src_slice.iter().max().unwrap()
+ );
} else {
panic!("Statistics::Int64 missing")
}
@@ -2608,7 +2618,7 @@ mod tests {
assert_eq!(row_group.num_columns(), 1);
let column = row_group.column(0);
let stats = column.statistics().unwrap();
- assert_eq!(stats.null_count(), 2);
+ assert_eq!(stats.null_count_opt(), Some(2));
}
}
@@ -3069,8 +3079,8 @@ mod tests {
// Column chunk of column "a" should have chunk level statistics
if let Statistics::ByteArray(byte_array_stats) =
a_col.statistics().unwrap() {
- let min = byte_array_stats.min();
- let max = byte_array_stats.max();
+ let min = byte_array_stats.min_opt().unwrap();
+ let max = byte_array_stats.max_opt().unwrap();
assert_eq!(min.as_bytes(), &[b'a']);
assert_eq!(max.as_bytes(), &[b'd']);
@@ -3141,8 +3151,8 @@ mod tests {
// Column chunk of column "a" should have chunk level statistics
if let Statistics::ByteArray(byte_array_stats) =
a_col.statistics().unwrap() {
- let min = byte_array_stats.min();
- let max = byte_array_stats.max();
+ let min = byte_array_stats.min_opt().unwrap();
+ let max = byte_array_stats.max_opt().unwrap();
assert_eq!(min.as_bytes(), &[b'a']);
assert_eq!(max.as_bytes(), &[b'd']);
diff --git a/parquet/src/column/page.rs b/parquet/src/column/page.rs
index 585d1951c1..e3931dfe9e 100644
--- a/parquet/src/column/page.rs
+++ b/parquet/src/column/page.rs
@@ -370,7 +370,7 @@ mod tests {
encoding: Encoding::PLAIN,
def_level_encoding: Encoding::RLE,
rep_level_encoding: Encoding::RLE,
- statistics: Some(Statistics::int32(Some(1), Some(2), None, 1,
true)),
+ statistics: Some(Statistics::int32(Some(1), Some(2), None,
Some(1), true)),
};
assert_eq!(data_page.page_type(), PageType::DATA_PAGE);
assert_eq!(data_page.buffer(), vec![0, 1, 2].as_slice());
@@ -378,7 +378,7 @@ mod tests {
assert_eq!(data_page.encoding(), Encoding::PLAIN);
assert_eq!(
data_page.statistics(),
- Some(&Statistics::int32(Some(1), Some(2), None, 1, true))
+ Some(&Statistics::int32(Some(1), Some(2), None, Some(1), true))
);
let data_page_v2 = Page::DataPageV2 {
@@ -390,7 +390,7 @@ mod tests {
def_levels_byte_len: 30,
rep_levels_byte_len: 40,
is_compressed: false,
- statistics: Some(Statistics::int32(Some(1), Some(2), None, 1,
true)),
+ statistics: Some(Statistics::int32(Some(1), Some(2), None,
Some(1), true)),
};
assert_eq!(data_page_v2.page_type(), PageType::DATA_PAGE_V2);
assert_eq!(data_page_v2.buffer(), vec![0, 1, 2].as_slice());
@@ -398,7 +398,7 @@ mod tests {
assert_eq!(data_page_v2.encoding(), Encoding::PLAIN);
assert_eq!(
data_page_v2.statistics(),
- Some(&Statistics::int32(Some(1), Some(2), None, 1, true))
+ Some(&Statistics::int32(Some(1), Some(2), None, Some(1), true))
);
let dict_page = Page::DictionaryPage {
@@ -422,7 +422,7 @@ mod tests {
encoding: Encoding::PLAIN,
def_level_encoding: Encoding::RLE,
rep_level_encoding: Encoding::RLE,
- statistics: Some(Statistics::int32(Some(1), Some(2), None, 1,
true)),
+ statistics: Some(Statistics::int32(Some(1), Some(2), None,
Some(1), true)),
};
let cpage = CompressedPage::new(data_page, 5);
diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs
index 519a219f94..8ea2878317 100644
--- a/parquet/src/column/writer/mod.rs
+++ b/parquet/src/column/writer/mod.rs
@@ -769,8 +769,8 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
}
Some(stat) => {
// Check if min/max are still ascending/descending across
pages
- let new_min = stat.min();
- let new_max = stat.max();
+ let new_min = stat.min_opt().unwrap();
+ let new_max = stat.max_opt().unwrap();
if let Some((last_min, last_max)) =
&self.last_non_null_data_page_min_max {
if self.data_page_boundary_ascending {
// If last min/max are greater than new min/max
then not ascending anymore
@@ -797,12 +797,12 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a,
E> {
null_page,
self.truncate_min_value(
self.props.column_index_truncate_length(),
- stat.min_bytes(),
+ stat.min_bytes_opt().unwrap(),
)
.0,
self.truncate_max_value(
self.props.column_index_truncate_length(),
- stat.max_bytes(),
+ stat.max_bytes_opt().unwrap(),
)
.0,
self.page_metrics.num_page_nulls as i64,
@@ -810,8 +810,8 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
} else {
self.column_index_builder.append(
null_page,
- stat.min_bytes().to_vec(),
- stat.max_bytes().to_vec(),
+ stat.min_bytes_opt().unwrap().to_vec(),
+ stat.max_bytes_opt().unwrap().to_vec(),
self.page_metrics.num_page_nulls as i64,
);
}
@@ -897,7 +897,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
Some(min),
Some(max),
None,
- self.page_metrics.num_page_nulls,
+ Some(self.page_metrics.num_page_nulls),
false,
),
)
@@ -1066,28 +1066,28 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a,
E> {
self.column_metrics.min_column_value.clone(),
self.column_metrics.max_column_value.clone(),
self.column_metrics.column_distinct_count,
- self.column_metrics.num_column_nulls,
+ Some(self.column_metrics.num_column_nulls),
false,
)
.with_backwards_compatible_min_max(backwards_compatible_min_max)
.into();
let statistics = match statistics {
- Statistics::ByteArray(stats) if stats.has_min_max_set() => {
+ Statistics::ByteArray(stats) if
stats._internal_has_min_max_set() => {
let (min, did_truncate_min) = self.truncate_min_value(
self.props.statistics_truncate_length(),
- stats.min_bytes(),
+ stats.min_bytes_opt().unwrap(),
);
let (max, did_truncate_max) = self.truncate_max_value(
self.props.statistics_truncate_length(),
- stats.max_bytes(),
+ stats.max_bytes_opt().unwrap(),
);
Statistics::ByteArray(
ValueStatistics::new(
Some(min.into()),
Some(max.into()),
stats.distinct_count(),
- stats.null_count(),
+ stats.null_count_opt(),
backwards_compatible_min_max,
)
.with_max_is_exact(!did_truncate_max)
@@ -1095,22 +1095,22 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a,
E> {
)
}
Statistics::FixedLenByteArray(stats)
- if (stats.has_min_max_set() && self.can_truncate_value())
=>
+ if (stats._internal_has_min_max_set() &&
self.can_truncate_value()) =>
{
let (min, did_truncate_min) = self.truncate_min_value(
self.props.statistics_truncate_length(),
- stats.min_bytes(),
+ stats.min_bytes_opt().unwrap(),
);
let (max, did_truncate_max) = self.truncate_max_value(
self.props.statistics_truncate_length(),
- stats.max_bytes(),
+ stats.max_bytes_opt().unwrap(),
);
Statistics::FixedLenByteArray(
ValueStatistics::new(
Some(min.into()),
Some(max.into()),
stats.distinct_count(),
- stats.null_count(),
+ stats.null_count_opt(),
backwards_compatible_min_max,
)
.with_max_is_exact(!did_truncate_max)
@@ -1841,12 +1841,11 @@ mod tests {
assert_eq!(metadata.data_page_offset(), 0);
assert_eq!(metadata.dictionary_page_offset(), Some(0));
if let Some(stats) = metadata.statistics() {
- assert!(stats.has_min_max_set());
- assert_eq!(stats.null_count(), 0);
+ assert_eq!(stats.null_count_opt(), Some(0));
assert_eq!(stats.distinct_count(), None);
if let Statistics::Int32(stats) = stats {
- assert_eq!(stats.min(), &1);
- assert_eq!(stats.max(), &4);
+ assert_eq!(stats.min_opt().unwrap(), &1);
+ assert_eq!(stats.max_opt().unwrap(), &4);
} else {
panic!("expecting Statistics::Int32");
}
@@ -1886,17 +1885,16 @@ mod tests {
.unwrap();
let metadata = writer.close().unwrap().metadata;
if let Some(stats) = metadata.statistics() {
- assert!(stats.has_min_max_set());
if let Statistics::ByteArray(stats) = stats {
assert_eq!(
- stats.min(),
+ stats.min_opt().unwrap(),
&ByteArray::from(vec![
255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8,
255u8, 179u8, 172u8, 19u8,
35u8, 231u8, 90u8, 0u8, 0u8,
])
);
assert_eq!(
- stats.max(),
+ stats.max_opt().unwrap(),
&ByteArray::from(vec![
0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 41u8, 162u8,
36u8, 26u8, 246u8,
44u8, 0u8, 0u8,
@@ -1923,10 +1921,9 @@ mod tests {
writer.write_batch(&[0, 1, 2, 3, 4, 5], None, None).unwrap();
let metadata = writer.close().unwrap().metadata;
if let Some(stats) = metadata.statistics() {
- assert!(stats.has_min_max_set());
if let Statistics::Int32(stats) = stats {
- assert_eq!(stats.min(), &0,);
- assert_eq!(stats.max(), &5,);
+ assert_eq!(stats.min_opt().unwrap(), &0,);
+ assert_eq!(stats.max_opt().unwrap(), &5,);
} else {
panic!("expecting Statistics::Int32");
}
@@ -1970,12 +1967,11 @@ mod tests {
assert_eq!(metadata.data_page_offset(), 0);
assert_eq!(metadata.dictionary_page_offset(), Some(0));
if let Some(stats) = metadata.statistics() {
- assert!(stats.has_min_max_set());
- assert_eq!(stats.null_count(), 0);
+ assert_eq!(stats.null_count_opt(), Some(0));
assert_eq!(stats.distinct_count().unwrap_or(0), 55);
if let Statistics::Int32(stats) = stats {
- assert_eq!(stats.min(), &-17);
- assert_eq!(stats.max(), &9000);
+ assert_eq!(stats.min_opt().unwrap(), &-17);
+ assert_eq!(stats.max_opt().unwrap(), &9000);
} else {
panic!("expecting Statistics::Int32");
}
@@ -2000,9 +1996,9 @@ mod tests {
let r = writer.close().unwrap();
let stats = r.metadata.statistics().unwrap();
- assert_eq!(stats.min_bytes(), 1_i32.to_le_bytes());
- assert_eq!(stats.max_bytes(), 7_i32.to_le_bytes());
- assert_eq!(stats.null_count(), 0);
+ assert_eq!(stats.min_bytes_opt().unwrap(), 1_i32.to_le_bytes());
+ assert_eq!(stats.max_bytes_opt().unwrap(), 7_i32.to_le_bytes());
+ assert_eq!(stats.null_count_opt(), Some(0));
assert!(stats.distinct_count().is_none());
drop(write);
@@ -2026,9 +2022,15 @@ mod tests {
assert_eq!(pages[1].page_type(), PageType::DATA_PAGE);
let page_statistics = pages[1].statistics().unwrap();
- assert_eq!(page_statistics.min_bytes(), 1_i32.to_le_bytes());
- assert_eq!(page_statistics.max_bytes(), 7_i32.to_le_bytes());
- assert_eq!(page_statistics.null_count(), 0);
+ assert_eq!(
+ page_statistics.min_bytes_opt().unwrap(),
+ 1_i32.to_le_bytes()
+ );
+ assert_eq!(
+ page_statistics.max_bytes_opt().unwrap(),
+ 7_i32.to_le_bytes()
+ );
+ assert_eq!(page_statistics.null_count_opt(), Some(0));
assert!(page_statistics.distinct_count().is_none());
}
@@ -2217,13 +2219,12 @@ mod tests {
#[test]
fn test_bool_statistics() {
let stats = statistics_roundtrip::<BoolType>(&[true, false, false,
true]);
- assert!(stats.has_min_max_set());
// Booleans have an unsigned sort order and so are not compatible
// with the deprecated `min` and `max` statistics
assert!(!stats.is_min_max_backwards_compatible());
if let Statistics::Boolean(stats) = stats {
- assert_eq!(stats.min(), &false);
- assert_eq!(stats.max(), &true);
+ assert_eq!(stats.min_opt().unwrap(), &false);
+ assert_eq!(stats.max_opt().unwrap(), &true);
} else {
panic!("expecting Statistics::Boolean, got {stats:?}");
}
@@ -2232,11 +2233,10 @@ mod tests {
#[test]
fn test_int32_statistics() {
let stats = statistics_roundtrip::<Int32Type>(&[-1, 3, -2, 2]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Int32(stats) = stats {
- assert_eq!(stats.min(), &-2);
- assert_eq!(stats.max(), &3);
+ assert_eq!(stats.min_opt().unwrap(), &-2);
+ assert_eq!(stats.max_opt().unwrap(), &3);
} else {
panic!("expecting Statistics::Int32, got {stats:?}");
}
@@ -2245,11 +2245,10 @@ mod tests {
#[test]
fn test_int64_statistics() {
let stats = statistics_roundtrip::<Int64Type>(&[-1, 3, -2, 2]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Int64(stats) = stats {
- assert_eq!(stats.min(), &-2);
- assert_eq!(stats.max(), &3);
+ assert_eq!(stats.min_opt().unwrap(), &-2);
+ assert_eq!(stats.max_opt().unwrap(), &3);
} else {
panic!("expecting Statistics::Int64, got {stats:?}");
}
@@ -2267,11 +2266,10 @@ mod tests {
.collect::<Vec<Int96>>();
let stats = statistics_roundtrip::<Int96Type>(&input);
- assert!(stats.has_min_max_set());
assert!(!stats.is_min_max_backwards_compatible());
if let Statistics::Int96(stats) = stats {
- assert_eq!(stats.min(), &Int96::from(vec![0, 20, 30]));
- assert_eq!(stats.max(), &Int96::from(vec![3, 20, 10]));
+ assert_eq!(stats.min_opt().unwrap(), &Int96::from(vec![0, 20,
30]));
+ assert_eq!(stats.max_opt().unwrap(), &Int96::from(vec![3, 20,
10]));
} else {
panic!("expecting Statistics::Int96, got {stats:?}");
}
@@ -2280,11 +2278,10 @@ mod tests {
#[test]
fn test_float_statistics() {
let stats = statistics_roundtrip::<FloatType>(&[-1.0, 3.0, -2.0, 2.0]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Float(stats) = stats {
- assert_eq!(stats.min(), &-2.0);
- assert_eq!(stats.max(), &3.0);
+ assert_eq!(stats.min_opt().unwrap(), &-2.0);
+ assert_eq!(stats.max_opt().unwrap(), &3.0);
} else {
panic!("expecting Statistics::Float, got {stats:?}");
}
@@ -2293,11 +2290,10 @@ mod tests {
#[test]
fn test_double_statistics() {
let stats = statistics_roundtrip::<DoubleType>(&[-1.0, 3.0, -2.0,
2.0]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Double(stats) = stats {
- assert_eq!(stats.min(), &-2.0);
- assert_eq!(stats.max(), &3.0);
+ assert_eq!(stats.min_opt().unwrap(), &-2.0);
+ assert_eq!(stats.max_opt().unwrap(), &3.0);
} else {
panic!("expecting Statistics::Double, got {stats:?}");
}
@@ -2312,10 +2308,9 @@ mod tests {
let stats = statistics_roundtrip::<ByteArrayType>(&input);
assert!(!stats.is_min_max_backwards_compatible());
- assert!(stats.has_min_max_set());
if let Statistics::ByteArray(stats) = stats {
- assert_eq!(stats.min(), &ByteArray::from("aaw"));
- assert_eq!(stats.max(), &ByteArray::from("zz"));
+ assert_eq!(stats.min_opt().unwrap(), &ByteArray::from("aaw"));
+ assert_eq!(stats.max_opt().unwrap(), &ByteArray::from("zz"));
} else {
panic!("expecting Statistics::ByteArray, got {stats:?}");
}
@@ -2329,13 +2324,12 @@ mod tests {
.collect::<Vec<_>>();
let stats = statistics_roundtrip::<FixedLenByteArrayType>(&input);
- assert!(stats.has_min_max_set());
assert!(!stats.is_min_max_backwards_compatible());
if let Statistics::FixedLenByteArray(stats) = stats {
let expected_min: FixedLenByteArray = ByteArray::from("aaw
").into();
- assert_eq!(stats.min(), &expected_min);
+ assert_eq!(stats.min_opt().unwrap(), &expected_min);
let expected_max: FixedLenByteArray = ByteArray::from("zz
").into();
- assert_eq!(stats.max(), &expected_max);
+ assert_eq!(stats.max_opt().unwrap(), &expected_max);
} else {
panic!("expecting Statistics::FixedLenByteArray, got {stats:?}");
}
@@ -2354,10 +2348,15 @@ mod tests {
.collect::<Vec<_>>();
let stats = float16_statistics_roundtrip(&input);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
- assert_eq!(stats.min(), &ByteArray::from(-f16::from_f32(2.0)));
- assert_eq!(stats.max(), &ByteArray::from(f16::from_f32(3.0)));
+ assert_eq!(
+ stats.min_opt().unwrap(),
+ &ByteArray::from(-f16::from_f32(2.0))
+ );
+ assert_eq!(
+ stats.max_opt().unwrap(),
+ &ByteArray::from(f16::from_f32(3.0))
+ );
}
#[test]
@@ -2368,10 +2367,12 @@ mod tests {
.collect::<Vec<_>>();
let stats = float16_statistics_roundtrip(&input);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
- assert_eq!(stats.min(), &ByteArray::from(f16::ONE));
- assert_eq!(stats.max(), &ByteArray::from(f16::ONE + f16::ONE));
+ assert_eq!(stats.min_opt().unwrap(), &ByteArray::from(f16::ONE));
+ assert_eq!(
+ stats.max_opt().unwrap(),
+ &ByteArray::from(f16::ONE + f16::ONE)
+ );
}
#[test]
@@ -2382,10 +2383,12 @@ mod tests {
.collect::<Vec<_>>();
let stats = float16_statistics_roundtrip(&input);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
- assert_eq!(stats.min(), &ByteArray::from(f16::ONE));
- assert_eq!(stats.max(), &ByteArray::from(f16::ONE + f16::ONE));
+ assert_eq!(stats.min_opt().unwrap(), &ByteArray::from(f16::ONE));
+ assert_eq!(
+ stats.max_opt().unwrap(),
+ &ByteArray::from(f16::ONE + f16::ONE)
+ );
}
#[test]
@@ -2396,10 +2399,12 @@ mod tests {
.collect::<Vec<_>>();
let stats = float16_statistics_roundtrip(&input);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
- assert_eq!(stats.min(), &ByteArray::from(f16::ONE));
- assert_eq!(stats.max(), &ByteArray::from(f16::ONE + f16::ONE));
+ assert_eq!(stats.min_opt().unwrap(), &ByteArray::from(f16::ONE));
+ assert_eq!(
+ stats.max_opt().unwrap(),
+ &ByteArray::from(f16::ONE + f16::ONE)
+ );
}
#[test]
@@ -2410,7 +2415,8 @@ mod tests {
.collect::<Vec<_>>();
let stats = float16_statistics_roundtrip(&input);
- assert!(!stats.has_min_max_set());
+ assert!(stats.min_bytes_opt().is_none());
+ assert!(stats.max_bytes_opt().is_none());
assert!(stats.is_min_max_backwards_compatible());
}
@@ -2422,10 +2428,9 @@ mod tests {
.collect::<Vec<_>>();
let stats = float16_statistics_roundtrip(&input);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
- assert_eq!(stats.min(), &ByteArray::from(f16::NEG_ZERO));
- assert_eq!(stats.max(), &ByteArray::from(f16::ZERO));
+ assert_eq!(stats.min_opt().unwrap(), &ByteArray::from(f16::NEG_ZERO));
+ assert_eq!(stats.max_opt().unwrap(), &ByteArray::from(f16::ZERO));
}
#[test]
@@ -2436,10 +2441,9 @@ mod tests {
.collect::<Vec<_>>();
let stats = float16_statistics_roundtrip(&input);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
- assert_eq!(stats.min(), &ByteArray::from(f16::NEG_ZERO));
- assert_eq!(stats.max(), &ByteArray::from(f16::ZERO));
+ assert_eq!(stats.min_opt().unwrap(), &ByteArray::from(f16::NEG_ZERO));
+ assert_eq!(stats.max_opt().unwrap(), &ByteArray::from(f16::ZERO));
}
#[test]
@@ -2450,10 +2454,9 @@ mod tests {
.collect::<Vec<_>>();
let stats = float16_statistics_roundtrip(&input);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
- assert_eq!(stats.min(), &ByteArray::from(f16::NEG_ZERO));
- assert_eq!(stats.max(), &ByteArray::from(f16::PI));
+ assert_eq!(stats.min_opt().unwrap(), &ByteArray::from(f16::NEG_ZERO));
+ assert_eq!(stats.max_opt().unwrap(), &ByteArray::from(f16::PI));
}
#[test]
@@ -2464,20 +2467,18 @@ mod tests {
.collect::<Vec<_>>();
let stats = float16_statistics_roundtrip(&input);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
- assert_eq!(stats.min(), &ByteArray::from(-f16::PI));
- assert_eq!(stats.max(), &ByteArray::from(f16::ZERO));
+ assert_eq!(stats.min_opt().unwrap(), &ByteArray::from(-f16::PI));
+ assert_eq!(stats.max_opt().unwrap(), &ByteArray::from(f16::ZERO));
}
#[test]
fn test_float_statistics_nan_middle() {
let stats = statistics_roundtrip::<FloatType>(&[1.0, f32::NAN, 2.0]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Float(stats) = stats {
- assert_eq!(stats.min(), &1.0);
- assert_eq!(stats.max(), &2.0);
+ assert_eq!(stats.min_opt().unwrap(), &1.0);
+ assert_eq!(stats.max_opt().unwrap(), &2.0);
} else {
panic!("expecting Statistics::Float");
}
@@ -2486,11 +2487,10 @@ mod tests {
#[test]
fn test_float_statistics_nan_start() {
let stats = statistics_roundtrip::<FloatType>(&[f32::NAN, 1.0, 2.0]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Float(stats) = stats {
- assert_eq!(stats.min(), &1.0);
- assert_eq!(stats.max(), &2.0);
+ assert_eq!(stats.min_opt().unwrap(), &1.0);
+ assert_eq!(stats.max_opt().unwrap(), &2.0);
} else {
panic!("expecting Statistics::Float");
}
@@ -2499,7 +2499,8 @@ mod tests {
#[test]
fn test_float_statistics_nan_only() {
let stats = statistics_roundtrip::<FloatType>(&[f32::NAN, f32::NAN]);
- assert!(!stats.has_min_max_set());
+ assert!(stats.min_bytes_opt().is_none());
+ assert!(stats.max_bytes_opt().is_none());
assert!(stats.is_min_max_backwards_compatible());
assert!(matches!(stats, Statistics::Float(_)));
}
@@ -2507,13 +2508,12 @@ mod tests {
#[test]
fn test_float_statistics_zero_only() {
let stats = statistics_roundtrip::<FloatType>(&[0.0]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Float(stats) = stats {
- assert_eq!(stats.min(), &-0.0);
- assert!(stats.min().is_sign_negative());
- assert_eq!(stats.max(), &0.0);
- assert!(stats.max().is_sign_positive());
+ assert_eq!(stats.min_opt().unwrap(), &-0.0);
+ assert!(stats.min_opt().unwrap().is_sign_negative());
+ assert_eq!(stats.max_opt().unwrap(), &0.0);
+ assert!(stats.max_opt().unwrap().is_sign_positive());
} else {
panic!("expecting Statistics::Float");
}
@@ -2522,13 +2522,12 @@ mod tests {
#[test]
fn test_float_statistics_neg_zero_only() {
let stats = statistics_roundtrip::<FloatType>(&[-0.0]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Float(stats) = stats {
- assert_eq!(stats.min(), &-0.0);
- assert!(stats.min().is_sign_negative());
- assert_eq!(stats.max(), &0.0);
- assert!(stats.max().is_sign_positive());
+ assert_eq!(stats.min_opt().unwrap(), &-0.0);
+ assert!(stats.min_opt().unwrap().is_sign_negative());
+ assert_eq!(stats.max_opt().unwrap(), &0.0);
+ assert!(stats.max_opt().unwrap().is_sign_positive());
} else {
panic!("expecting Statistics::Float");
}
@@ -2537,12 +2536,11 @@ mod tests {
#[test]
fn test_float_statistics_zero_min() {
let stats = statistics_roundtrip::<FloatType>(&[0.0, 1.0, f32::NAN,
2.0]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Float(stats) = stats {
- assert_eq!(stats.min(), &-0.0);
- assert!(stats.min().is_sign_negative());
- assert_eq!(stats.max(), &2.0);
+ assert_eq!(stats.min_opt().unwrap(), &-0.0);
+ assert!(stats.min_opt().unwrap().is_sign_negative());
+ assert_eq!(stats.max_opt().unwrap(), &2.0);
} else {
panic!("expecting Statistics::Float");
}
@@ -2551,12 +2549,11 @@ mod tests {
#[test]
fn test_float_statistics_neg_zero_max() {
let stats = statistics_roundtrip::<FloatType>(&[-0.0, -1.0, f32::NAN,
-2.0]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Float(stats) = stats {
- assert_eq!(stats.min(), &-2.0);
- assert_eq!(stats.max(), &0.0);
- assert!(stats.max().is_sign_positive());
+ assert_eq!(stats.min_opt().unwrap(), &-2.0);
+ assert_eq!(stats.max_opt().unwrap(), &0.0);
+ assert!(stats.max_opt().unwrap().is_sign_positive());
} else {
panic!("expecting Statistics::Float");
}
@@ -2565,11 +2562,10 @@ mod tests {
#[test]
fn test_double_statistics_nan_middle() {
let stats = statistics_roundtrip::<DoubleType>(&[1.0, f64::NAN, 2.0]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Double(stats) = stats {
- assert_eq!(stats.min(), &1.0);
- assert_eq!(stats.max(), &2.0);
+ assert_eq!(stats.min_opt().unwrap(), &1.0);
+ assert_eq!(stats.max_opt().unwrap(), &2.0);
} else {
panic!("expecting Statistics::Double");
}
@@ -2578,11 +2574,10 @@ mod tests {
#[test]
fn test_double_statistics_nan_start() {
let stats = statistics_roundtrip::<DoubleType>(&[f64::NAN, 1.0, 2.0]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Double(stats) = stats {
- assert_eq!(stats.min(), &1.0);
- assert_eq!(stats.max(), &2.0);
+ assert_eq!(stats.min_opt().unwrap(), &1.0);
+ assert_eq!(stats.max_opt().unwrap(), &2.0);
} else {
panic!("expecting Statistics::Double");
}
@@ -2591,7 +2586,8 @@ mod tests {
#[test]
fn test_double_statistics_nan_only() {
let stats = statistics_roundtrip::<DoubleType>(&[f64::NAN, f64::NAN]);
- assert!(!stats.has_min_max_set());
+ assert!(stats.min_bytes_opt().is_none());
+ assert!(stats.max_bytes_opt().is_none());
assert!(matches!(stats, Statistics::Double(_)));
assert!(stats.is_min_max_backwards_compatible());
}
@@ -2599,13 +2595,12 @@ mod tests {
#[test]
fn test_double_statistics_zero_only() {
let stats = statistics_roundtrip::<DoubleType>(&[0.0]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Double(stats) = stats {
- assert_eq!(stats.min(), &-0.0);
- assert!(stats.min().is_sign_negative());
- assert_eq!(stats.max(), &0.0);
- assert!(stats.max().is_sign_positive());
+ assert_eq!(stats.min_opt().unwrap(), &-0.0);
+ assert!(stats.min_opt().unwrap().is_sign_negative());
+ assert_eq!(stats.max_opt().unwrap(), &0.0);
+ assert!(stats.max_opt().unwrap().is_sign_positive());
} else {
panic!("expecting Statistics::Double");
}
@@ -2614,13 +2609,12 @@ mod tests {
#[test]
fn test_double_statistics_neg_zero_only() {
let stats = statistics_roundtrip::<DoubleType>(&[-0.0]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Double(stats) = stats {
- assert_eq!(stats.min(), &-0.0);
- assert!(stats.min().is_sign_negative());
- assert_eq!(stats.max(), &0.0);
- assert!(stats.max().is_sign_positive());
+ assert_eq!(stats.min_opt().unwrap(), &-0.0);
+ assert!(stats.min_opt().unwrap().is_sign_negative());
+ assert_eq!(stats.max_opt().unwrap(), &0.0);
+ assert!(stats.max_opt().unwrap().is_sign_positive());
} else {
panic!("expecting Statistics::Double");
}
@@ -2629,12 +2623,11 @@ mod tests {
#[test]
fn test_double_statistics_zero_min() {
let stats = statistics_roundtrip::<DoubleType>(&[0.0, 1.0, f64::NAN,
2.0]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Double(stats) = stats {
- assert_eq!(stats.min(), &-0.0);
- assert!(stats.min().is_sign_negative());
- assert_eq!(stats.max(), &2.0);
+ assert_eq!(stats.min_opt().unwrap(), &-0.0);
+ assert!(stats.min_opt().unwrap().is_sign_negative());
+ assert_eq!(stats.max_opt().unwrap(), &2.0);
} else {
panic!("expecting Statistics::Double");
}
@@ -2643,12 +2636,11 @@ mod tests {
#[test]
fn test_double_statistics_neg_zero_max() {
let stats = statistics_roundtrip::<DoubleType>(&[-0.0, -1.0, f64::NAN,
-2.0]);
- assert!(stats.has_min_max_set());
assert!(stats.is_min_max_backwards_compatible());
if let Statistics::Double(stats) = stats {
- assert_eq!(stats.min(), &-2.0);
- assert_eq!(stats.max(), &0.0);
- assert!(stats.max().is_sign_positive());
+ assert_eq!(stats.min_opt().unwrap(), &-2.0);
+ assert_eq!(stats.max_opt().unwrap(), &0.0);
+ assert!(stats.max_opt().unwrap().is_sign_positive());
} else {
panic!("expecting Statistics::Double");
}
@@ -2705,15 +2697,20 @@ mod tests {
}
if let Some(stats) = r.metadata.statistics() {
- assert!(stats.has_min_max_set());
- assert_eq!(stats.null_count(), 0);
+ assert_eq!(stats.null_count_opt(), Some(0));
assert_eq!(stats.distinct_count(), None);
if let Statistics::Int32(stats) = stats {
// first page is [1,2,3,4]
// second page is [-5,2,4,8]
// note that we don't increment here, as this is a non
BinaryArray type.
- assert_eq!(stats.min_bytes(),
column_index.min_values[1].as_slice());
- assert_eq!(stats.max_bytes(),
column_index.max_values.get(1).unwrap());
+ assert_eq!(
+ stats.min_bytes_opt(),
+ Some(column_index.min_values[1].as_slice())
+ );
+ assert_eq!(
+ stats.max_bytes_opt(),
+ column_index.max_values.get(1).map(Vec::as_slice)
+ );
} else {
panic!("expecting Statistics::Int32");
}
@@ -2760,16 +2757,21 @@ mod tests {
assert_eq!(0, column_index.null_counts.as_ref().unwrap()[0]);
if let Some(stats) = r.metadata.statistics() {
- assert!(stats.has_min_max_set());
- assert_eq!(stats.null_count(), 0);
+ assert_eq!(stats.null_count_opt(), Some(0));
assert_eq!(stats.distinct_count(), None);
if let Statistics::FixedLenByteArray(stats) = stats {
let column_index_min_value = &column_index.min_values[0];
let column_index_max_value = &column_index.max_values[0];
// Column index stats are truncated, while the column chunk's
aren't.
- assert_ne!(stats.min_bytes(),
column_index_min_value.as_slice());
- assert_ne!(stats.max_bytes(),
column_index_max_value.as_slice());
+ assert_ne!(
+ stats.min_bytes_opt(),
+ Some(column_index_min_value.as_slice())
+ );
+ assert_ne!(
+ stats.max_bytes_opt(),
+ Some(column_index_max_value.as_slice())
+ );
assert_eq!(
column_index_min_value.len(),
@@ -2827,8 +2829,7 @@ mod tests {
assert_eq!(0, column_index.null_counts.as_ref().unwrap()[0]);
if let Some(stats) = r.metadata.statistics() {
- assert!(stats.has_min_max_set());
- assert_eq!(stats.null_count(), 0);
+ assert_eq!(stats.null_count_opt(), Some(0));
assert_eq!(stats.distinct_count(), None);
if let Statistics::FixedLenByteArray(_stats) = stats {
let column_index_min_value = &column_index.min_values[0];
@@ -2840,8 +2841,8 @@ mod tests {
assert_eq!("B".as_bytes(), column_index_min_value.as_slice());
assert_eq!("C".as_bytes(), column_index_max_value.as_slice());
- assert_ne!(column_index_min_value, stats.min_bytes());
- assert_ne!(column_index_max_value, stats.max_bytes());
+ assert_ne!(column_index_min_value,
stats.min_bytes_opt().unwrap());
+ assert_ne!(column_index_max_value,
stats.max_bytes_opt().unwrap());
} else {
panic!("expecting Statistics::FixedLenByteArray");
}
@@ -2875,10 +2876,9 @@ mod tests {
// ensure bytes weren't truncated for statistics
let stats = r.metadata.statistics().unwrap();
- assert!(stats.has_min_max_set());
if let Statistics::FixedLenByteArray(stats) = stats {
- let stats_min_bytes = stats.min_bytes();
- let stats_max_bytes = stats.max_bytes();
+ let stats_min_bytes = stats.min_bytes_opt().unwrap();
+ let stats_max_bytes = stats.max_bytes_opt().unwrap();
assert_eq!(expected_value, stats_min_bytes);
assert_eq!(expected_value, stats_max_bytes);
} else {
@@ -2915,10 +2915,9 @@ mod tests {
// ensure bytes weren't truncated for statistics
let stats = r.metadata.statistics().unwrap();
- assert!(stats.has_min_max_set());
if let Statistics::FixedLenByteArray(stats) = stats {
- let stats_min_bytes = stats.min_bytes();
- let stats_max_bytes = stats.max_bytes();
+ let stats_min_bytes = stats.min_bytes_opt().unwrap();
+ let stats_max_bytes = stats.max_bytes_opt().unwrap();
assert_eq!(expected_value, stats_min_bytes);
assert_eq!(expected_value, stats_max_bytes);
} else {
@@ -2951,12 +2950,11 @@ mod tests {
assert_eq!(1, r.rows_written);
let stats = r.metadata.statistics().expect("statistics");
- assert!(stats.has_min_max_set());
- assert_eq!(stats.null_count(), 0);
+ assert_eq!(stats.null_count_opt(), Some(0));
assert_eq!(stats.distinct_count(), None);
if let Statistics::ByteArray(_stats) = stats {
- let min_value = _stats.min();
- let max_value = _stats.max();
+ let min_value = _stats.min_opt().unwrap();
+ let max_value = _stats.max_opt().unwrap();
assert!(!_stats.min_is_exact());
assert!(!_stats.max_is_exact());
@@ -3004,12 +3002,11 @@ mod tests {
assert_eq!(1, r.rows_written);
let stats = r.metadata.statistics().expect("statistics");
- assert!(stats.has_min_max_set());
- assert_eq!(stats.null_count(), 0);
+ assert_eq!(stats.null_count_opt(), Some(0));
assert_eq!(stats.distinct_count(), None);
if let Statistics::FixedLenByteArray(_stats) = stats {
- let min_value = _stats.min();
- let max_value = _stats.max();
+ let min_value = _stats.min_opt().unwrap();
+ let max_value = _stats.max_opt().unwrap();
assert!(!_stats.min_is_exact());
assert!(!_stats.max_is_exact());
@@ -3326,7 +3323,8 @@ mod tests {
} else {
panic!("metadata missing statistics");
};
- assert!(!stats.has_min_max_set());
+ assert!(stats.min_bytes_opt().is_none());
+ assert!(stats.max_bytes_opt().is_none());
}
fn write_multiple_pages<T: DataType>(
diff --git a/parquet/src/file/metadata/memory.rs
b/parquet/src/file/metadata/memory.rs
index bb822b4ccb..ad45226790 100644
--- a/parquet/src/file/metadata/memory.rs
+++ b/parquet/src/file/metadata/memory.rs
@@ -183,14 +183,8 @@ impl<T: ParquetValueType> HeapSize for PageIndex<T> {
impl<T: ParquetValueType> HeapSize for ValueStatistics<T> {
fn heap_size(&self) -> usize {
- if self.has_min_max_set() {
- return self.min().heap_size() + self.max().heap_size();
- } else if self.min_is_exact() {
- return self.min().heap_size();
- } else if self.max_is_exact() {
- return self.max().heap_size();
- }
- 0
+ self.min_opt().map(T::heap_size).unwrap_or(0)
+ + self.max_opt().map(T::heap_size).unwrap_or(0)
}
}
impl HeapSize for bool {
diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs
index a18128cfd5..f3c73ee81a 100644
--- a/parquet/src/file/metadata/mod.rs
+++ b/parquet/src/file/metadata/mod.rs
@@ -1615,7 +1615,7 @@ mod tests {
.iter()
.map(|column_descr| {
ColumnChunkMetaData::builder(column_descr.clone())
- .set_statistics(Statistics::new::<i32>(None, None, None,
0, false))
+ .set_statistics(Statistics::new::<i32>(None, None, None,
None, false))
.build()
})
.collect::<Result<Vec<_>>>()
@@ -1653,7 +1653,13 @@ mod tests {
.iter()
.map(|column_descr| {
ColumnChunkMetaData::builder(column_descr.clone())
- .set_statistics(Statistics::new::<i32>(Some(0), Some(100),
None, 0, false))
+ .set_statistics(Statistics::new::<i32>(
+ Some(0),
+ Some(100),
+ None,
+ None,
+ false,
+ ))
.build()
})
.collect::<Result<Vec<_>>>()
@@ -1667,7 +1673,7 @@ mod tests {
let row_group_meta_with_stats = vec![row_group_meta_with_stats];
let parquet_meta = ParquetMetaData::new(file_metadata.clone(),
row_group_meta_with_stats);
- let base_expected_size = 2280;
+ let base_expected_size = 2312;
assert_eq!(parquet_meta.memory_size(), base_expected_size);
@@ -1695,7 +1701,7 @@ mod tests {
]]),
);
- let bigger_expected_size = 2784;
+ let bigger_expected_size = 2816;
// more set fields means more memory usage
assert!(bigger_expected_size > base_expected_size);
assert_eq!(parquet_meta.memory_size(), bigger_expected_size);
diff --git a/parquet/src/file/serialized_reader.rs
b/parquet/src/file/serialized_reader.rs
index 07e80f7f69..0a3e519318 100644
--- a/parquet/src/file/serialized_reader.rs
+++ b/parquet/src/file/serialized_reader.rs
@@ -1544,7 +1544,10 @@ mod tests {
fn get_row_group_min_max_bytes(r: &RowGroupMetaData, col_num: usize) ->
(&[u8], &[u8]) {
let statistics = r.column(col_num).statistics().unwrap();
- (statistics.min_bytes(), statistics.max_bytes())
+ (
+ statistics.min_bytes_opt().unwrap_or_default(),
+ statistics.max_bytes_opt().unwrap_or_default(),
+ )
}
#[test]
diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs
index dded9c9c11..4134685ffc 100644
--- a/parquet/src/file/statistics.rs
+++ b/parquet/src/file/statistics.rs
@@ -24,17 +24,16 @@
//! ```rust
//! use parquet::file::statistics::Statistics;
//!
-//! let stats = Statistics::int32(Some(1), Some(10), None, 3, true);
-//! assert_eq!(stats.null_count(), 3);
-//! assert!(stats.has_min_max_set());
+//! let stats = Statistics::int32(Some(1), Some(10), None, Some(3), true);
+//! assert_eq!(stats.null_count_opt(), Some(3));
//! assert!(stats.is_min_max_deprecated());
//! assert!(stats.min_is_exact());
//! assert!(stats.max_is_exact());
//!
//! match stats {
//! Statistics::Int32(ref typed) => {
-//! assert_eq!(*typed.min(), 1);
-//! assert_eq!(*typed.max(), 10);
+//! assert_eq!(typed.min_opt(), Some(&1));
+//! assert_eq!(typed.max_opt(), Some(&10));
//! }
//! _ => {}
//! }
@@ -89,7 +88,7 @@ macro_rules! statistics_new_func {
min: $vtype,
max: $vtype,
distinct: Option<u64>,
- nulls: u64,
+ nulls: Option<u64>,
is_deprecated: bool,
) -> Self {
Statistics::$stat(ValueStatistics::new(
@@ -127,6 +126,8 @@ pub fn from_thrift(
Ok(match thrift_stats {
Some(stats) => {
// Number of nulls recorded, when it is not available, we just
mark it as 0.
+ // TODO this should be `None` if there is no information about
NULLS.
+ // see https://github.com/apache/arrow-rs/pull/6216/files
let null_count = stats.null_count.unwrap_or(0);
if null_count < 0 {
@@ -137,7 +138,7 @@ pub fn from_thrift(
}
// Generic null count.
- let null_count = null_count as u64;
+ let null_count = Some(null_count as u64);
// Generic distinct count (count of distinct values occurring)
let distinct_count = stats.distinct_count.map(|value| value as
u64);
// Whether or not statistics use deprecated min/max fields.
@@ -243,14 +244,19 @@ pub fn from_thrift(
pub fn to_thrift(stats: Option<&Statistics>) -> Option<TStatistics> {
let stats = stats?;
+ // record null counts if greater than zero.
+ //
+ // TODO: This should be Some(0) if there are no nulls.
+ // see https://github.com/apache/arrow-rs/pull/6216/files
+ let null_count = stats
+ .null_count_opt()
+ .map(|value| value as i64)
+ .filter(|&x| x > 0);
+
let mut thrift_stats = TStatistics {
max: None,
min: None,
- null_count: if stats.has_nulls() {
- Some(stats.null_count() as i64)
- } else {
- None
- },
+ null_count,
distinct_count: stats.distinct_count().map(|value| value as i64),
max_value: None,
min_value: None,
@@ -259,17 +265,12 @@ pub fn to_thrift(stats: Option<&Statistics>) ->
Option<TStatistics> {
};
// Get min/max if set.
- let (min, max, min_exact, max_exact) = if stats.has_min_max_set() {
- (
- Some(stats.min_bytes().to_vec()),
- Some(stats.max_bytes().to_vec()),
- Some(stats.min_is_exact()),
- Some(stats.max_is_exact()),
- )
- } else {
- (None, None, None, None)
- };
-
+ let (min, max, min_exact, max_exact) = (
+ stats.min_bytes_opt().map(|x| x.to_vec()),
+ stats.max_bytes_opt().map(|x| x.to_vec()),
+ Some(stats.min_is_exact()),
+ Some(stats.max_is_exact()),
+ );
if stats.is_min_max_backwards_compatible() {
// Copy to deprecated min, max values for compatibility with older
readers
thrift_stats.min.clone_from(&min);
@@ -321,7 +322,7 @@ impl Statistics {
min: Option<T>,
max: Option<T>,
distinct_count: Option<u64>,
- null_count: u64,
+ null_count: Option<u64>,
is_deprecated: bool,
) -> Self {
Self::from(ValueStatistics::new(
@@ -385,19 +386,39 @@ impl Statistics {
/// Returns number of null values for the column.
/// Note that this includes all nulls when column is part of the complex
type.
+ ///
+ /// Note this API returns 0 if the null count is not available.
+ #[deprecated(since = "53.0.0", note = "Use `null_count_opt` method
instead")]
pub fn null_count(&self) -> u64 {
- statistics_enum_func![self, null_count]
+ // 0 to remain consistent behavior prior to `null_count_opt`
+ self.null_count_opt().unwrap_or(0)
}
/// Returns `true` if statistics collected any null values, `false`
otherwise.
+ #[deprecated(since = "53.0.0", note = "Use `null_count_opt` method
instead")]
+ #[allow(deprecated)]
pub fn has_nulls(&self) -> bool {
self.null_count() > 0
}
- /// Returns `true` if min value and max value are set.
+ /// Returns number of null values for the column, if known.
+ /// Note that this includes all nulls when column is part of the complex
type.
+ ///
+ /// Note this API returns Some(0) even if the null count was not present
+ /// in the statistics.
+ /// See <https://github.com/apache/arrow-rs/pull/6216/files>
+ pub fn null_count_opt(&self) -> Option<u64> {
+ statistics_enum_func![self, null_count_opt]
+ }
+
+ /// Whether or not min and max values are set.
/// Normally both min/max values will be set to `Some(value)` or `None`.
+ #[deprecated(
+ since = "53.0.0",
+ note = "Use `min_bytes_opt` and `max_bytes_opt` methods instead"
+ )]
pub fn has_min_max_set(&self) -> bool {
- statistics_enum_func![self, has_min_max_set]
+ statistics_enum_func![self, _internal_has_min_max_set]
}
/// Returns `true` if the min value is set, and is an exact min value.
@@ -410,16 +431,28 @@ impl Statistics {
statistics_enum_func![self, max_is_exact]
}
+ /// Returns slice of bytes that represent min value, if min value is known.
+ pub fn min_bytes_opt(&self) -> Option<&[u8]> {
+ statistics_enum_func![self, min_bytes_opt]
+ }
+
/// Returns slice of bytes that represent min value.
/// Panics if min value is not set.
+ #[deprecated(since = "53.0.0", note = "Use `max_bytes_opt` instead")]
pub fn min_bytes(&self) -> &[u8] {
- statistics_enum_func![self, min_bytes]
+ self.min_bytes_opt().unwrap()
+ }
+
+ /// Returns slice of bytes that represent max value, if max value is known.
+ pub fn max_bytes_opt(&self) -> Option<&[u8]> {
+ statistics_enum_func![self, max_bytes_opt]
}
/// Returns slice of bytes that represent max value.
/// Panics if max value is not set.
+ #[deprecated(since = "53.0.0", note = "Use `max_bytes_opt` instead")]
pub fn max_bytes(&self) -> &[u8] {
- statistics_enum_func![self, max_bytes]
+ self.max_bytes_opt().unwrap()
}
/// Returns physical type associated with statistics.
@@ -464,7 +497,7 @@ pub struct ValueStatistics<T> {
max: Option<T>,
// Distinct count could be omitted in some cases
distinct_count: Option<u64>,
- null_count: u64,
+ null_count: Option<u64>,
// Whether or not the min or max values are exact, or truncated.
is_max_value_exact: bool,
@@ -485,7 +518,7 @@ impl<T: ParquetValueType> ValueStatistics<T> {
min: Option<T>,
max: Option<T>,
distinct_count: Option<u64>,
- null_count: u64,
+ null_count: Option<u64>,
is_min_max_deprecated: bool,
) -> Self {
Self {
@@ -538,37 +571,68 @@ impl<T: ParquetValueType> ValueStatistics<T> {
///
/// Panics if min value is not set, e.g. all values are `null`.
/// Use `has_min_max_set` method to check that.
+ #[deprecated(since = "53.0.0", note = "Use `min_opt` instead")]
pub fn min(&self) -> &T {
self.min.as_ref().unwrap()
}
+ /// Returns min value of the statistics, if known.
+ pub fn min_opt(&self) -> Option<&T> {
+ self.min.as_ref()
+ }
+
/// Returns max value of the statistics.
///
/// Panics if max value is not set, e.g. all values are `null`.
/// Use `has_min_max_set` method to check that.
+ #[deprecated(since = "53.0.0", note = "Use `max_opt` instead")]
pub fn max(&self) -> &T {
self.max.as_ref().unwrap()
}
+ /// Returns max value of the statistics, if known.
+ pub fn max_opt(&self) -> Option<&T> {
+ self.max.as_ref()
+ }
+
+ /// Returns min value as bytes of the statistics, if min value is known.
+ pub fn min_bytes_opt(&self) -> Option<&[u8]> {
+ self.min_opt().map(AsBytes::as_bytes)
+ }
+
/// Returns min value as bytes of the statistics.
///
/// Panics if min value is not set, use `has_min_max_set` method to check
/// if values are set.
+ #[deprecated(since = "53.0.0", note = "Use `min_bytes_opt` instead")]
pub fn min_bytes(&self) -> &[u8] {
- self.min().as_bytes()
+ self.min_bytes_opt().unwrap()
+ }
+
+ /// Returns max value as bytes of the statistics, if max value is known.
+ pub fn max_bytes_opt(&self) -> Option<&[u8]> {
+ self.max_opt().map(AsBytes::as_bytes)
}
/// Returns max value as bytes of the statistics.
///
/// Panics if max value is not set, use `has_min_max_set` method to check
/// if values are set.
+ #[deprecated(since = "53.0.0", note = "Use `max_bytes_opt` instead")]
pub fn max_bytes(&self) -> &[u8] {
- self.max().as_bytes()
+ self.max_bytes_opt().unwrap()
}
/// Whether or not min and max values are set.
/// Normally both min/max values will be set to `Some(value)` or `None`.
+ #[deprecated(since = "53.0.0", note = "Use `min_opt` and `max_opt` methods
instead")]
pub fn has_min_max_set(&self) -> bool {
+ self._internal_has_min_max_set()
+ }
+
+ /// Whether or not min and max values are set.
+ /// Normally both min/max values will be set to `Some(value)` or `None`.
+ pub(crate) fn _internal_has_min_max_set(&self) -> bool {
self.min.is_some() && self.max.is_some()
}
@@ -587,8 +651,16 @@ impl<T: ParquetValueType> ValueStatistics<T> {
self.distinct_count
}
- /// Returns null count.
+ /// Returns number of null values for the column.
+ /// Note that this includes all nulls when column is part of the complex
type.
+ #[deprecated(since = "53.0.0", note = "Use `null_count_opt` method
instead")]
pub fn null_count(&self) -> u64 {
+ // 0 to remain consistent behavior prior to `null_count_opt`
+ self.null_count_opt().unwrap_or(0)
+ }
+
+ /// Returns null count.
+ pub fn null_count_opt(&self) -> Option<u64> {
self.null_count
}
@@ -630,7 +702,11 @@ impl<T: ParquetValueType> fmt::Display for
ValueStatistics<T> {
Some(value) => write!(f, "{value}")?,
None => write!(f, "N/A")?,
}
- write!(f, ", null_count: {}", self.null_count)?;
+ write!(f, ", null_count: ")?;
+ match self.null_count {
+ Some(value) => write!(f, "{value}")?,
+ None => write!(f, "N/A")?,
+ }
write!(f, ", min_max_deprecated: {}", self.is_min_max_deprecated)?;
write!(f, ", max_value_exact: {}", self.is_max_value_exact)?;
write!(f, ", min_value_exact: {}", self.is_min_value_exact)?;
@@ -642,7 +718,7 @@ impl<T: ParquetValueType> fmt::Debug for ValueStatistics<T>
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
- "{{min: {:?}, max: {:?}, distinct_count: {:?}, null_count: {}, \
+ "{{min: {:?}, max: {:?}, distinct_count: {:?}, null_count: {:?}, \
min_max_deprecated: {}, min_max_backwards_compatible: {},
max_value_exact: {}, min_value_exact: {}}}",
self.min,
self.max,
@@ -662,21 +738,19 @@ mod tests {
#[test]
fn test_statistics_min_max_bytes() {
- let stats = Statistics::int32(Some(-123), Some(234), None, 1, false);
- assert!(stats.has_min_max_set());
- assert_eq!(stats.min_bytes(), (-123).as_bytes());
- assert_eq!(stats.max_bytes(), 234.as_bytes());
+ let stats = Statistics::int32(Some(-123), Some(234), None, Some(1),
false);
+ assert_eq!(stats.min_bytes_opt(), Some((-123).as_bytes()));
+ assert_eq!(stats.max_bytes_opt(), Some(234.as_bytes()));
let stats = Statistics::byte_array(
Some(ByteArray::from(vec![1, 2, 3])),
Some(ByteArray::from(vec![3, 4, 5])),
None,
- 1,
+ Some(1),
true,
);
- assert!(stats.has_min_max_set());
- assert_eq!(stats.min_bytes(), &[1, 2, 3]);
- assert_eq!(stats.max_bytes(), &[3, 4, 5]);
+ assert_eq!(stats.min_bytes_opt().unwrap(), &[1, 2, 3]);
+ assert_eq!(stats.max_bytes_opt().unwrap(), &[3, 4, 5]);
}
#[test]
@@ -704,30 +778,30 @@ mod tests {
#[test]
fn test_statistics_debug() {
- let stats = Statistics::int32(Some(1), Some(12), None, 12, true);
+ let stats = Statistics::int32(Some(1), Some(12), None, Some(12), true);
assert_eq!(
format!("{stats:?}"),
- "Int32({min: Some(1), max: Some(12), distinct_count: None,
null_count: 12, \
+ "Int32({min: Some(1), max: Some(12), distinct_count: None,
null_count: Some(12), \
min_max_deprecated: true, min_max_backwards_compatible: true,
max_value_exact: true, min_value_exact: true})"
);
- let stats = Statistics::int32(None, None, None, 7, false);
+ let stats = Statistics::int32(None, None, None, Some(7), false);
assert_eq!(
format!("{stats:?}"),
- "Int32({min: None, max: None, distinct_count: None, null_count: 7,
\
+ "Int32({min: None, max: None, distinct_count: None, null_count:
Some(7), \
min_max_deprecated: false, min_max_backwards_compatible: false,
max_value_exact: false, min_value_exact: false})"
)
}
#[test]
fn test_statistics_display() {
- let stats = Statistics::int32(Some(1), Some(12), None, 12, true);
+ let stats = Statistics::int32(Some(1), Some(12), None, Some(12), true);
assert_eq!(
format!("{stats}"),
"{min: 1, max: 12, distinct_count: N/A, null_count: 12,
min_max_deprecated: true, max_value_exact: true, min_value_exact: true}"
);
- let stats = Statistics::int64(None, None, None, 7, false);
+ let stats = Statistics::int64(None, None, None, Some(7), false);
assert_eq!(
format!("{stats}"),
"{min: N/A, max: N/A, distinct_count: N/A, null_count: 7,
min_max_deprecated: \
@@ -738,7 +812,7 @@ mod tests {
Some(Int96::from(vec![1, 0, 0])),
Some(Int96::from(vec![2, 3, 4])),
None,
- 3,
+ Some(3),
true,
);
assert_eq!(
@@ -752,7 +826,7 @@ mod tests {
Some(ByteArray::from(vec![1u8])),
Some(ByteArray::from(vec![2u8])),
Some(5),
- 7,
+ Some(7),
false,
)
.with_max_is_exact(false)
@@ -766,22 +840,22 @@ mod tests {
#[test]
fn test_statistics_partial_eq() {
- let expected = Statistics::int32(Some(12), Some(45), None, 11, true);
+ let expected = Statistics::int32(Some(12), Some(45), None, Some(11),
true);
- assert!(Statistics::int32(Some(12), Some(45), None, 11, true) ==
expected);
- assert!(Statistics::int32(Some(11), Some(45), None, 11, true) !=
expected);
- assert!(Statistics::int32(Some(12), Some(44), None, 11, true) !=
expected);
- assert!(Statistics::int32(Some(12), Some(45), None, 23, true) !=
expected);
- assert!(Statistics::int32(Some(12), Some(45), None, 11, false) !=
expected);
+ assert!(Statistics::int32(Some(12), Some(45), None, Some(11), true) ==
expected);
+ assert!(Statistics::int32(Some(11), Some(45), None, Some(11), true) !=
expected);
+ assert!(Statistics::int32(Some(12), Some(44), None, Some(11), true) !=
expected);
+ assert!(Statistics::int32(Some(12), Some(45), None, Some(23), true) !=
expected);
+ assert!(Statistics::int32(Some(12), Some(45), None, Some(11), false)
!= expected);
assert!(
- Statistics::int32(Some(12), Some(45), None, 11, false)
- != Statistics::int64(Some(12), Some(45), None, 11, false)
+ Statistics::int32(Some(12), Some(45), None, Some(11), false)
+ != Statistics::int64(Some(12), Some(45), None, Some(11), false)
);
assert!(
- Statistics::boolean(Some(false), Some(true), None, 0, true)
- != Statistics::double(Some(1.2), Some(4.5), None, 0, true)
+ Statistics::boolean(Some(false), Some(true), None, None, true)
+ != Statistics::double(Some(1.2), Some(4.5), None, None, true)
);
assert!(
@@ -789,13 +863,13 @@ mod tests {
Some(ByteArray::from(vec![1, 2, 3])),
Some(ByteArray::from(vec![1, 2, 3])),
None,
- 0,
+ None,
true
) != Statistics::fixed_len_byte_array(
Some(ByteArray::from(vec![1, 2, 3]).into()),
Some(ByteArray::from(vec![1, 2, 3]).into()),
None,
- 0,
+ None,
true,
)
);
@@ -805,14 +879,14 @@ mod tests {
Some(ByteArray::from(vec![1, 2, 3])),
Some(ByteArray::from(vec![1, 2, 3])),
None,
- 0,
+ None,
true,
) != Statistics::ByteArray(
ValueStatistics::new(
Some(ByteArray::from(vec![1, 2, 3])),
Some(ByteArray::from(vec![1, 2, 3])),
None,
- 0,
+ None,
true,
)
.with_max_is_exact(false)
@@ -824,14 +898,14 @@ mod tests {
Some(FixedLenByteArray::from(vec![1, 2, 3])),
Some(FixedLenByteArray::from(vec![1, 2, 3])),
None,
- 0,
+ None,
true,
) != Statistics::FixedLenByteArray(
ValueStatistics::new(
Some(FixedLenByteArray::from(vec![1, 2, 3])),
Some(FixedLenByteArray::from(vec![1, 2, 3])),
None,
- 0,
+ None,
true,
)
.with_min_is_exact(false)
@@ -848,45 +922,123 @@ mod tests {
assert_eq!(from_thrift(tpe, thrift_stats).unwrap(), Some(stats));
}
- check_stats(Statistics::boolean(Some(false), Some(true), None, 7,
true));
- check_stats(Statistics::boolean(Some(false), Some(true), None, 7,
true));
- check_stats(Statistics::boolean(Some(false), Some(true), None, 0,
false));
- check_stats(Statistics::boolean(Some(true), Some(true), None, 7,
true));
- check_stats(Statistics::boolean(Some(false), Some(false), None, 7,
true));
- check_stats(Statistics::boolean(None, None, None, 7, true));
+ check_stats(Statistics::boolean(
+ Some(false),
+ Some(true),
+ None,
+ Some(7),
+ true,
+ ));
+ check_stats(Statistics::boolean(
+ Some(false),
+ Some(true),
+ None,
+ Some(7),
+ true,
+ ));
+ check_stats(Statistics::boolean(
+ Some(false),
+ Some(true),
+ None,
+ Some(0),
+ false,
+ ));
+ check_stats(Statistics::boolean(
+ Some(true),
+ Some(true),
+ None,
+ Some(7),
+ true,
+ ));
+ check_stats(Statistics::boolean(
+ Some(false),
+ Some(false),
+ None,
+ Some(7),
+ true,
+ ));
+ check_stats(Statistics::boolean(None, None, None, Some(7), true));
- check_stats(Statistics::int32(Some(-100), Some(500), None, 7, true));
- check_stats(Statistics::int32(Some(-100), Some(500), None, 0, false));
- check_stats(Statistics::int32(None, None, None, 7, true));
+ check_stats(Statistics::int32(
+ Some(-100),
+ Some(500),
+ None,
+ Some(7),
+ true,
+ ));
+ check_stats(Statistics::int32(
+ Some(-100),
+ Some(500),
+ None,
+ Some(0),
+ false,
+ ));
+ check_stats(Statistics::int32(None, None, None, Some(7), true));
- check_stats(Statistics::int64(Some(-100), Some(200), None, 7, true));
- check_stats(Statistics::int64(Some(-100), Some(200), None, 0, false));
- check_stats(Statistics::int64(None, None, None, 7, true));
+ check_stats(Statistics::int64(
+ Some(-100),
+ Some(200),
+ None,
+ Some(7),
+ true,
+ ));
+ check_stats(Statistics::int64(
+ Some(-100),
+ Some(200),
+ None,
+ Some(0),
+ false,
+ ));
+ check_stats(Statistics::int64(None, None, None, Some(7), true));
- check_stats(Statistics::float(Some(1.2), Some(3.4), None, 7, true));
- check_stats(Statistics::float(Some(1.2), Some(3.4), None, 0, false));
- check_stats(Statistics::float(None, None, None, 7, true));
+ check_stats(Statistics::float(Some(1.2), Some(3.4), None, Some(7),
true));
+ check_stats(Statistics::float(
+ Some(1.2),
+ Some(3.4),
+ None,
+ Some(0),
+ false,
+ ));
+ check_stats(Statistics::float(None, None, None, Some(7), true));
- check_stats(Statistics::double(Some(1.2), Some(3.4), None, 7, true));
- check_stats(Statistics::double(Some(1.2), Some(3.4), None, 0, false));
- check_stats(Statistics::double(None, None, None, 7, true));
+ check_stats(Statistics::double(
+ Some(1.2),
+ Some(3.4),
+ None,
+ Some(7),
+ true,
+ ));
+ check_stats(Statistics::double(
+ Some(1.2),
+ Some(3.4),
+ None,
+ Some(0),
+ false,
+ ));
+ check_stats(Statistics::double(None, None, None, Some(7), true));
check_stats(Statistics::byte_array(
Some(ByteArray::from(vec![1, 2, 3])),
Some(ByteArray::from(vec![3, 4, 5])),
None,
- 7,
+ Some(7),
true,
));
- check_stats(Statistics::byte_array(None, None, None, 7, true));
+ check_stats(Statistics::byte_array(None, None, None, Some(7), true));
check_stats(Statistics::fixed_len_byte_array(
Some(ByteArray::from(vec![1, 2, 3]).into()),
Some(ByteArray::from(vec![3, 4, 5]).into()),
None,
- 7,
+ Some(7),
+ true,
+ ));
+ check_stats(Statistics::fixed_len_byte_array(
+ None,
+ None,
+ None,
+ Some(7),
true,
));
- check_stats(Statistics::fixed_len_byte_array(None, None, None, 7,
true));
}
}
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index 23ac3556bd..a23a2a3327 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -1122,7 +1122,7 @@ mod tests {
encoding: Encoding::DELTA_BINARY_PACKED,
def_level_encoding: Encoding::RLE,
rep_level_encoding: Encoding::RLE,
- statistics: Some(Statistics::int32(Some(1), Some(3), None, 7,
true)),
+ statistics: Some(Statistics::int32(Some(1), Some(3), None,
Some(7), true)),
},
Page::DataPageV2 {
buf: Bytes::from(vec![4; 128]),
@@ -1133,7 +1133,7 @@ mod tests {
def_levels_byte_len: 24,
rep_levels_byte_len: 32,
is_compressed: false,
- statistics: Some(Statistics::int32(Some(1), Some(3), None, 7,
true)),
+ statistics: Some(Statistics::int32(Some(1), Some(3), None,
Some(7), true)),
},
];
@@ -1156,7 +1156,7 @@ mod tests {
encoding: Encoding::DELTA_BINARY_PACKED,
def_level_encoding: Encoding::RLE,
rep_level_encoding: Encoding::RLE,
- statistics: Some(Statistics::int32(Some(1), Some(3), None, 7,
true)),
+ statistics: Some(Statistics::int32(Some(1), Some(3), None,
Some(7), true)),
},
Page::DataPageV2 {
buf: Bytes::from(vec![4; 128]),