alamb commented on code in PR #8762:
URL: https://github.com/apache/arrow-rs/pull/8762#discussion_r2482705647
##########
parquet/src/bloom_filter/mod.rs:
##########
@@ -244,29 +244,8 @@ fn num_of_bits_from_ndv_fpp(ndv: u64, fpp: f64) -> usize {
}
impl Sbbf {
- /// Create a new [Sbbf] with given number of distinct values and false
positive probability.
- /// Will return an error if `fpp` is greater than or equal to 1.0 or less
than 0.0.
- pub(crate) fn new_with_ndv_fpp(ndv: u64, fpp: f64) -> Result<Self,
ParquetError> {
- if !(0.0..1.0).contains(&fpp) {
- return Err(ParquetError::General(format!(
- "False positive probability must be between 0.0 and 1.0, got
{fpp}"
- )));
- }
- let num_bits = num_of_bits_from_ndv_fpp(ndv, fpp);
- Ok(Self::new_with_num_of_bytes(num_bits / 8))
- }
-
- /// Create a new [Sbbf] with given number of bytes, the exact number of
bytes will be adjusted
- /// to the next power of two bounded by [BITSET_MIN_LENGTH] and
[BITSET_MAX_LENGTH].
- pub(crate) fn new_with_num_of_bytes(num_bytes: usize) -> Self {
- let num_bytes = optimal_num_of_bytes(num_bytes);
- assert_eq!(num_bytes % size_of::<Block>(), 0);
- let num_blocks = num_bytes / size_of::<Block>();
- let bitset = vec![Block::ZERO; num_blocks];
- Self(bitset)
- }
-
- pub(crate) fn new(bitset: &[u8]) -> Self {
+ /// Create a new [Sbbf] from raw bitset bytes.
Review Comment:
Can you please add a doc example here showing how to use this API with your
intended usecase?
That will both help the documentation and ensure we have exposed enough of
the API to be useful
For example, if the idea is to save a SBFF using `write` and then re-create
it again, an exmaple that:
1. made a Sbbf (ideally read it from a file)
2. wrote it to a `Vec<>`
3. Created a new Sbbf from that vec
4. Show that it is the same as the original
it would be nice to mention that a correct argument for creating a sbff can
be created using `Sbff::write`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]