This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 9d7dde2722 remove T: ParquetValueType bound on ValueStatistics (#8824)
9d7dde2722 is described below
commit 9d7dde2722a8392996a1f5a978e989b2fea4ed0e
Author: Patrick Marks <[email protected]>
AuthorDate: Fri Nov 14 18:12:06 2025 +0100
remove T: ParquetValueType bound on ValueStatistics (#8824)
The old bound was T: ParquetValueType which isn't public, so it was
impossible to call the methods on ValueStatistics from a generic
function in another crate, forcing the use of macros. Split out a few
functions that require T: AsBytes into a separate impl.
# Which issue does this PR close?
Closes #8823
# Rationale for this change
See issue for more details
# Are these changes tested?
Added a test that call the `ValueStatistics::min_opt` method from a
generic function without a bound to a private trait.
# Are there any user-facing changes?
The methods were already public and documented, they just couldn't
actually be called from external crates.
---------
Co-authored-by: Ed Seidl <[email protected]>
---
parquet/src/file/statistics.rs | 46 ++++++++++++++++++++++++++++++++----------
1 file changed, 35 insertions(+), 11 deletions(-)
diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs
index 889e5ea66b..a813e82d13 100644
--- a/parquet/src/file/statistics.rs
+++ b/parquet/src/file/statistics.rs
@@ -515,7 +515,7 @@ pub struct ValueStatistics<T> {
is_min_max_backwards_compatible: bool,
}
-impl<T: ParquetValueType> ValueStatistics<T> {
+impl<T> ValueStatistics<T> {
/// Creates new typed statistics.
pub fn new(
min: Option<T>,
@@ -580,16 +580,6 @@ impl<T: ParquetValueType> ValueStatistics<T> {
self.max.as_ref()
}
- /// Returns min value as bytes of the statistics, if min value is known.
- pub fn min_bytes_opt(&self) -> Option<&[u8]> {
- self.min_opt().map(AsBytes::as_bytes)
- }
-
- /// Returns max value as bytes of the statistics, if max value is known.
- pub fn max_bytes_opt(&self) -> Option<&[u8]> {
- self.max_opt().map(AsBytes::as_bytes)
- }
-
/// Whether or not min and max values are set.
/// Normally both min/max values will be set to `Some(value)` or `None`.
pub(crate) fn _internal_has_min_max_set(&self) -> bool {
@@ -636,6 +626,18 @@ impl<T: ParquetValueType> ValueStatistics<T> {
}
}
+impl<T: AsBytes> ValueStatistics<T> {
+ /// Returns min value as bytes of the statistics, if min value is known.
+ pub fn min_bytes_opt(&self) -> Option<&[u8]> {
+ self.min_opt().map(AsBytes::as_bytes)
+ }
+
+ /// Returns max value as bytes of the statistics, if max value is known.
+ pub fn max_bytes_opt(&self) -> Option<&[u8]> {
+ self.max_opt().map(AsBytes::as_bytes)
+ }
+}
+
impl<T: ParquetValueType> fmt::Display for ValueStatistics<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{{")?;
@@ -1121,4 +1123,26 @@ mod tests {
"Parquet error: Incorrect Int96 max statistics"
);
}
+
+ // Ensures that we can call ValueStatistics::min_opt from a
+ // generic function without reyling on a bound to a private trait.
+ fn generic_statistics_handler<T: std::fmt::Display>(stats:
ValueStatistics<T>) -> String {
+ match stats.min_opt() {
+ Some(s) => format!("min: {}", s),
+ None => "min: NA".to_string(),
+ }
+ }
+
+ #[test]
+ fn test_generic_access() {
+ let stats = Statistics::int32(Some(12), Some(45), None, Some(11),
false);
+
+ match stats {
+ Statistics::Int32(v) => {
+ let stats_string = generic_statistics_handler(v);
+ assert_eq!(&stats_string, "min: 12");
+ }
+ _ => unreachable!(),
+ }
+ }
}