RoseZhang123 commented on code in PR #8762:
URL: https://github.com/apache/arrow-rs/pull/8762#discussion_r2550843912
##########
parquet/src/bloom_filter/mod.rs:
##########
@@ -244,29 +244,8 @@ fn num_of_bits_from_ndv_fpp(ndv: u64, fpp: f64) -> usize {
}
impl Sbbf {
- /// Create a new [Sbbf] with given number of distinct values and false
positive probability.
- /// Will return an error if `fpp` is greater than or equal to 1.0 or less
than 0.0.
- pub(crate) fn new_with_ndv_fpp(ndv: u64, fpp: f64) -> Result<Self,
ParquetError> {
- if !(0.0..1.0).contains(&fpp) {
- return Err(ParquetError::General(format!(
- "False positive probability must be between 0.0 and 1.0, got
{fpp}"
- )));
- }
- let num_bits = num_of_bits_from_ndv_fpp(ndv, fpp);
- Ok(Self::new_with_num_of_bytes(num_bits / 8))
- }
-
- /// Create a new [Sbbf] with given number of bytes, the exact number of
bytes will be adjusted
- /// to the next power of two bounded by [BITSET_MIN_LENGTH] and
[BITSET_MAX_LENGTH].
- pub(crate) fn new_with_num_of_bytes(num_bytes: usize) -> Self {
- let num_bytes = optimal_num_of_bytes(num_bytes);
- assert_eq!(num_bytes % size_of::<Block>(), 0);
- let num_blocks = num_bytes / size_of::<Block>();
- let bitset = vec![Block::ZERO; num_blocks];
- Self(bitset)
- }
-
- pub(crate) fn new(bitset: &[u8]) -> Self {
+ /// Create a new [Sbbf] from raw bitset bytes.
Review Comment:
Sounds good. Decided to add one more function to make things easier. Now
bitset_len() returns the size of the bloom filter’s bitset in bytes, and
as_slice() returns the bitset.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]