This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 12f57af5955 Minor: Clarify docs on `EnabledStatistics` (#5812)
12f57af5955 is described below
commit 12f57af595524d532d72a77bb4f096c204d4f2a3
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed May 29 06:38:15 2024 -0400
Minor: Clarify docs on `EnabledStatistics` (#5812)
* Minor: Clarify docs on `EnabledStatistics`
* refine
* Update parquet/src/file/properties.rs
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
---------
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
---
parquet/src/file/properties.rs | 24 ++++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)
diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs
index 287e73c9906..59e29440ae0 100644
--- a/parquet/src/file/properties.rs
+++ b/parquet/src/file/properties.rs
@@ -669,14 +669,30 @@ impl WriterPropertiesBuilder {
}
}
-/// Controls the level of statistics to be computed by the writer
+/// Controls the level of statistics to be computed by the writer and stored in
+/// the parquet file.
+///
+/// Enabling statistics makes the resulting Parquet file larger and requires
+/// more time to read the parquet footer.
+///
+/// Statistics can be used to improve query performance by pruning row groups
+/// and pages during query execution if the query engine supports evaluating
the
+/// predicate using the statistics.
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum EnabledStatistics {
- /// Compute no statistics
+ /// Compute no statistics.
None,
- /// Compute chunk-level statistics but not page-level
+ /// Compute column chunk-level statistics but not page-level.
+ ///
+ /// Setting this option will store one set of statistics for each relevant
+ /// column for each row group. The more row groups written, the more
+ /// statistics will be stored.
Chunk,
- /// Compute page-level and chunk-level statistics
+ /// Compute page-level and column chunk-level statistics.
+ ///
+ /// Setting this option will store one set of statistics for each relevant
+ /// column for each page and row group. The more row groups and the more
+ /// pages written, the more statistics will be stored.
Page,
}