AdamGS commented on PR #595:
URL:
https://github.com/apache/arrow-rs-object-store/pull/595#issuecomment-3720334814
I've put a quick benchmark together:
```rust
use criterion::{BenchmarkId, Criterion, Throughput, criterion_group,
criterion_main};
use object_store::local::LocalFileSystem;
use object_store::path::Path;
use object_store::{ObjectStore, ObjectStoreExt};
use rand::Rng;
use std::ops::Range;
use tempfile::TempDir;
const FILE_SIZE: u64 = 64 * 1024 * 1024; // 64 MB
const RANGE_SIZE: u64 = 8 * 1024; // 8 KB ranges
fn generate_random_ranges(file_size: u64, range_size: u64, count: usize) ->
Vec<Range<u64>> {
let mut rng = rand::rng();
(0..count)
.map(|_| {
let start = rng.random_range(0..file_size - range_size);
start..start + range_size
})
.collect()
}
fn bench_read_ranges(c: &mut Criterion) {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.unwrap();
// Set up the test file
let temp_dir = TempDir::new().unwrap();
let store = LocalFileSystem::new_with_prefix(temp_dir.path()).unwrap();
let path = Path::from("bench_file");
// Create file with random data
let data: Vec<u8> = (0..FILE_SIZE).map(|i| (i % 256) as u8).collect();
rt.block_on(async {
store.put(&path, data.into()).await.unwrap();
});
let mut group = c.benchmark_group("read_ranges");
for num_ranges in [10, 100, 1000] {
let ranges = generate_random_ranges(FILE_SIZE, RANGE_SIZE,
num_ranges);
let total_bytes = num_ranges as u64 * RANGE_SIZE;
group.throughput(Throughput::Bytes(total_bytes));
group.bench_with_input(
BenchmarkId::new("local_fs", num_ranges),
&ranges,
|b, ranges| {
b.to_async(&rt)
.iter(|| async { store.get_ranges(&path,
ranges).await.unwrap() });
},
);
}
group.finish();
}
criterion_group!(benches, bench_read_ranges);
criterion_main!(benches);
```
The improvement on my local macbook looks like:
```
read_ranges/local_fs/10 time: [28.918 µs 29.101 µs 29.320 µs]
thrpt: [2.6021 GiB/s 2.6217 GiB/s 2.6383 GiB/s]
change:
time: [−13.736% −12.996% −12.275%] (p = 0.00 <
0.05)
thrpt: [+13.993% +14.937% +15.923%]
Performance has improved.
Found 5 outliers among 100 measurements (5.00%)
1 (1.00%) low mild
3 (3.00%) high mild
1 (1.00%) high severe
read_ranges/local_fs/100
time: [113.00 µs 114.28 µs 115.71 µs]
thrpt: [6.5936 GiB/s 6.6763 GiB/s 6.7517 GiB/s]
change:
time: [−21.183% −20.507% −19.814%] (p = 0.00 <
0.05)
thrpt: [+24.709% +25.798% +26.876%]
Performance has improved.
Found 11 outliers among 100 measurements (11.00%)
1 (1.00%) low mild
3 (3.00%) high mild
7 (7.00%) high severe
Benchmarking read_ranges/local_fs/1000: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase
target time to 7.6s, enable flat sampling, or reduce sample count to 50.
read_ranges/local_fs/1000
time: [1.1890 ms 1.2145 ms 1.2437 ms]
thrpt: [6.1346 GiB/s 6.2819 GiB/s 6.4165 GiB/s]
change:
time: [−19.749% −17.230% −14.523%] (p = 0.00 <
0.05)
thrpt: [+16.991% +20.817% +24.609%]
Performance has improved.
Found 5 outliers among 100 measurements (5.00%)
5 (5.00%) high mild
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]