This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 28d7537c26 refactor: make `BloomFilterProperties` fpp/ndv private with
accessors (#9969)
28d7537c26 is described below
commit 28d7537c26de0ad3320cc44bc47521d1c66f02dd
Author: Yu-Chuan Hung <[email protected]>
AuthorDate: Fri May 15 02:19:02 2026 +0800
refactor: make `BloomFilterProperties` fpp/ndv private with accessors
(#9969)
# Which issue does this PR close?
- Follow-up to #9877; completes the remaining scope of #9667 (making
`BloomFilterProperties` fields non-public and exposing accessors).
# Rationale for this change
#9877 introduced `BloomFilterPropertiesBuilder`. With the builder in
place, the public `fpp` / `ndv` fields constrain future evolution.
Locking them down behind accessors makes the public API smaller and more
evolvable, matching the original direction outlined in #9667.
# What changes are included in this PR?
- Make `BloomFilterProperties::fpp` and `BloomFilterProperties::ndv`
private fields
- Add `pub fn fpp(&self) -> f64` and `pub fn ndv(&self) -> u64` accessor
methods
- Move the per-field documentation verbatim onto the accessor methods so
it remains rendered by rustdoc (private fields are not rendered)
# Are these changes tested?
Yes — the existing test suite covers the affected paths
# Are there any user-facing changes?
**Yes — this is a breaking change to the public API.**
- External struct-literal construction no longer compiles:
```rust
// Before — no longer works
BloomFilterProperties { fpp: 0.01, ndv: 10_000 }
// Use the builder added in #9877 instead:
BloomFilterProperties::builder()
.with_fpp(0.01)
.with_max_ndv(10_000)
.build()
```
- Direct field reads (props.fpp, props.ndv) no longer compile; use
props.fpp() / props.ndv() instead.
---
parquet/src/column/writer/encoder.rs | 4 +--
parquet/src/file/properties.rs | 48 +++++++++++++++++++++---------------
2 files changed, 30 insertions(+), 22 deletions(-)
diff --git a/parquet/src/column/writer/encoder.rs
b/parquet/src/column/writer/encoder.rs
index ec1afca583..2ea3376ae7 100644
--- a/parquet/src/column/writer/encoder.rs
+++ b/parquet/src/column/writer/encoder.rs
@@ -393,8 +393,8 @@ pub(crate) fn create_bloom_filter(
) -> Result<(Option<Sbbf>, f64)> {
match props.bloom_filter_properties(descr.path()) {
Some(bf_props) => Ok((
- Some(Sbbf::new_with_ndv_fpp(bf_props.ndv, bf_props.fpp)?),
- bf_props.fpp,
+ Some(Sbbf::new_with_ndv_fpp(bf_props.ndv(), bf_props.fpp())?),
+ bf_props.fpp(),
)),
None => Ok((None, 0.0)),
}
diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs
index 816973f83c..19191f6018 100644
--- a/parquet/src/file/properties.rs
+++ b/parquet/src/file/properties.rs
@@ -52,9 +52,9 @@ pub const DEFAULT_BLOOM_FILTER_POSITION: BloomFilterPosition
= BloomFilterPositi
pub const DEFAULT_CREATED_BY: &str = concat!("parquet-rs version ",
env!("CARGO_PKG_VERSION"));
/// Default value for [`WriterProperties::column_index_truncate_length`]
pub const DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH: Option<usize> = Some(64);
-/// Default value for [`BloomFilterProperties::fpp`]
+/// Default value for [`BloomFilterProperties::fpp()`]
pub const DEFAULT_BLOOM_FILTER_FPP: f64 = 0.05;
-/// Default value for [`BloomFilterProperties::ndv`].
+/// Default value for [`BloomFilterProperties::ndv()`].
///
/// Note: this is only the fallback default used when constructing
[`BloomFilterProperties`]
/// directly. When using [`WriterPropertiesBuilder`], columns with bloom
filters enabled
@@ -1437,6 +1437,26 @@ impl Default for EnabledStatistics {
/// [`Sbbf::fold_to_target_fpp`]: crate::bloom_filter::Sbbf::fold_to_target_fpp
#[derive(Debug, Clone, PartialEq)]
pub struct BloomFilterProperties {
+ fpp: f64,
+ ndv: u64,
+}
+
+impl Default for BloomFilterProperties {
+ fn default() -> Self {
+ BloomFilterProperties {
+ fpp: DEFAULT_BLOOM_FILTER_FPP,
+ ndv: DEFAULT_BLOOM_FILTER_NDV,
+ }
+ }
+}
+
+impl BloomFilterProperties {
+ /// Returns a new [`BloomFilterPropertiesBuilder`] for constructing
+ /// [`BloomFilterProperties`] with custom values.
+ pub fn builder() -> BloomFilterPropertiesBuilder {
+ BloomFilterPropertiesBuilder::new()
+ }
+
/// False positive probability. This should be always between 0 and 1
exclusive. Defaults to [`DEFAULT_BLOOM_FILTER_FPP`].
///
/// You should set this value by calling
[`WriterPropertiesBuilder::set_bloom_filter_fpp`].
@@ -1447,7 +1467,10 @@ pub struct BloomFilterProperties {
///
/// This value also serves as the target FPP for bloom filter folding:
after all values
/// are inserted, the filter is folded down to the smallest size that
still meets this FPP.
- pub fpp: f64,
+ pub fn fpp(&self) -> f64 {
+ self.fpp
+ }
+
/// Maximum expected number of distinct values. Defaults to
[`DEFAULT_BLOOM_FILTER_NDV`].
///
/// You should set this value by calling
[`WriterPropertiesBuilder::set_bloom_filter_max_ndv`].
@@ -1469,23 +1492,8 @@ pub struct BloomFilterProperties {
/// If you do set this value explicitly it is probably best to set it for
each column
/// individually via
[`WriterPropertiesBuilder::set_column_bloom_filter_max_ndv`] rather than
globally,
/// since different columns may have different numbers of distinct values.
- pub ndv: u64,
-}
-
-impl Default for BloomFilterProperties {
- fn default() -> Self {
- BloomFilterProperties {
- fpp: DEFAULT_BLOOM_FILTER_FPP,
- ndv: DEFAULT_BLOOM_FILTER_NDV,
- }
- }
-}
-
-impl BloomFilterProperties {
- /// Returns a new [`BloomFilterPropertiesBuilder`] for constructing
- /// [`BloomFilterProperties`] with custom values.
- pub fn builder() -> BloomFilterPropertiesBuilder {
- BloomFilterPropertiesBuilder::new()
+ pub fn ndv(&self) -> u64 {
+ self.ndv
}
}