This is an automated email from the ASF dual-hosted git repository.
etseidl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 511ad068ae bench(parquet): add Sbbf check/insert benchmarks (#10041)
511ad068ae is described below
commit 511ad068ae2b7f511e16215d9d3c96fb0f334f2c
Author: Dan Mattheiss <[email protected]>
AuthorDate: Sat May 30 23:18:11 2026 -0400
bench(parquet): add Sbbf check/insert benchmarks (#10041)
Adds `bench_check` and `bench_insert` benchmarks
for`Sbbf::{check,insert}`. Originally benchmarks were part of #10011 but
were split out to follow Contributing guidelines
# Are these changes tested?
Benchmarks compiled and ran using `cargo bench -p parquet --bench
bloom_filter`.
# Are there any user-facing changes?
No.
---
parquet/benches/bloom_filter.rs | 123 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 120 insertions(+), 3 deletions(-)
diff --git a/parquet/benches/bloom_filter.rs b/parquet/benches/bloom_filter.rs
index ca4f900067..cb2bfb6b4a 100644
--- a/parquet/benches/bloom_filter.rs
+++ b/parquet/benches/bloom_filter.rs
@@ -15,8 +15,12 @@
// specific language governing permissions and limitations
// under the License.
-use criterion::{BenchmarkId, Criterion, Throughput, criterion_group,
criterion_main};
+use std::hint;
+
+use criterion::{BatchSize, BenchmarkId, Criterion, Throughput,
criterion_group, criterion_main};
use parquet::bloom_filter::Sbbf;
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
/// Build a bloom filter sized for `initial_ndv` at `fpp`, insert `num_values`
distinct values,
/// and return it ready for folding.
@@ -46,7 +50,7 @@ fn bench_fold_to_target_fpp(c: &mut Criterion) {
f.fold_to_target_fpp(fpp);
f
},
- criterion::BatchSize::SmallInput,
+ BatchSize::SmallInput,
);
});
}
@@ -104,10 +108,123 @@ fn bench_insert_only(c: &mut Criterion) {
group.finish();
}
+/// Benchmark `Sbbf::insert` across the same three cache regimes as
+/// `bench_check`. The filter is allocated once per regime and reused
+/// across iterations — bloom inserts are idempotent on identical
+/// input, so this measures the pure insert kernel cost (hash + mask +
+/// load + OR + store) without per-iteration allocation noise.
+fn bench_insert(c: &mut Criterion) {
+ let regimes: [(&str, usize); 3] = [
+ ("s_128KiB", 128 * 1024),
+ ("m_2MiB", 2 * 1024 * 1024),
+ ("l_32MiB", 32 * 1024 * 1024),
+ ];
+ const NUM_INSERTS: usize = 50_000;
+
+ let mut group = c.benchmark_group("insert");
+
+ for (label, num_bytes) in regimes {
+ let mut rng = StdRng::seed_from_u64(0xC0FFEE);
+ let keys: Vec<[u8; 16]> = (0..NUM_INSERTS)
+ .map(|_| {
+ let mut k = [0u8; 16];
+ rng.fill(&mut k);
+ k
+ })
+ .collect();
+
+ let mut filter = Sbbf::new_with_num_of_bytes(num_bytes);
+
+ group.throughput(Throughput::Elements(NUM_INSERTS as u64));
+ group.bench_with_input(BenchmarkId::new("ins", label), &keys, |b,
keys| {
+ b.iter(|| {
+ for k in keys {
+ filter.insert(hint::black_box(k.as_slice()));
+ }
+ });
+ });
+ }
+ group.finish();
+}
+
+/// Benchmark `Sbbf::check` across three cache regimes and both miss-heavy
+/// (the common case for Parquet row-group skipping) and hit-heavy workloads.
+///
+/// The three filter sizes span the cache hierarchy on a typical server:
+/// 128 KB fits in L2, 2 MB lives in L3, 32 MB spills out of L3.
+fn bench_check(c: &mut Criterion) {
+ let regimes: [(&str, usize, usize); 3] = [
+ ("s_128KiB", 128 * 1024, 10_000),
+ ("m_2MiB", 2 * 1024 * 1024, 200_000),
+ ("l_32MiB", 32 * 1024 * 1024, 3_000_000),
+ ];
+ const NUM_QUERIES: usize = 50_000;
+
+ let mut group = c.benchmark_group("check");
+
+ for (label, num_bytes, ndv) in regimes {
+ let mut rng = StdRng::seed_from_u64(0xC0FFEE);
+
+ // Clear the high bit of byte 0 on inserted keys so the miss set below
+ // — which forces the same bit to 1 — is genuinely disjoint by
+ // construction (not just disjoint at birthday-paradox probability).
+ let mut filter = Sbbf::new_with_num_of_bytes(num_bytes);
+ let mut inserted: Vec<[u8; 16]> = Vec::with_capacity(ndv);
+ for _ in 0..ndv {
+ let mut k = [0u8; 16];
+ rng.fill(&mut k);
+ k[0] &= 0x7f;
+ filter.insert(k.as_slice());
+ inserted.push(k);
+ }
+
+ // Disjoint miss set: high bit of byte 0 is 1 here and 0 in `inserted`,
+ // so no miss key can equal an inserted key.
+ let miss_keys: Vec<[u8; 16]> = (0..NUM_QUERIES)
+ .map(|_| {
+ let mut k = [0u8; 16];
+ rng.fill(&mut k);
+ k[0] |= 0x80;
+ k
+ })
+ .collect();
+ let hit_keys: Vec<[u8; 16]> = (0..NUM_QUERIES)
+ .map(|_| inserted[rng.random_range(0..inserted.len())])
+ .collect();
+
+ group.throughput(Throughput::Elements(NUM_QUERIES as u64));
+ group.bench_with_input(BenchmarkId::new("miss", label), &miss_keys,
|b, keys| {
+ b.iter(|| {
+ let mut found = 0u64;
+ for k in keys {
+ if hint::black_box(filter.check(k.as_slice())) {
+ found += 1;
+ }
+ }
+ hint::black_box(found)
+ });
+ });
+ group.bench_with_input(BenchmarkId::new("hit", label), &hit_keys, |b,
keys| {
+ b.iter(|| {
+ let mut found = 0u64;
+ for k in keys {
+ if hint::black_box(filter.check(k.as_slice())) {
+ found += 1;
+ }
+ }
+ hint::black_box(found)
+ });
+ });
+ }
+ group.finish();
+}
+
criterion_group!(
benches,
bench_fold_to_target_fpp,
bench_insert_and_fold,
- bench_insert_only
+ bench_insert_only,
+ bench_insert,
+ bench_check,
);
criterion_main!(benches);