Re: [PR] Only read file metadata once in `LocalFileSystem::read_ranges` [arrow-rs-object-store]

via GitHub Wed, 07 Jan 2026 11:04:20 -0800


AdamGS commented on PR #595:
URL: 
https://github.com/apache/arrow-rs-object-store/pull/595#issuecomment-3720334814


   I've put a quick benchmark together:
   ```rust
   use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, 
criterion_main};
   use object_store::local::LocalFileSystem;
   use object_store::path::Path;
   use object_store::{ObjectStore, ObjectStoreExt};
   use rand::Rng;
   use std::ops::Range;
   use tempfile::TempDir;
   
   const FILE_SIZE: u64 = 64 * 1024 * 1024; // 64 MB
   const RANGE_SIZE: u64 = 8 * 1024; // 8 KB ranges
   
   fn generate_random_ranges(file_size: u64, range_size: u64, count: usize) -> 
Vec<Range<u64>> {
       let mut rng = rand::rng();
       (0..count)
           .map(|_| {
               let start = rng.random_range(0..file_size - range_size);
               start..start + range_size
           })
           .collect()
   }
   
   fn bench_read_ranges(c: &mut Criterion) {
       let rt = tokio::runtime::Builder::new_current_thread()
           .enable_all()
           .build()
           .unwrap();
   
       // Set up the test file
       let temp_dir = TempDir::new().unwrap();
       let store = LocalFileSystem::new_with_prefix(temp_dir.path()).unwrap();
       let path = Path::from("bench_file");
   
       // Create file with random data
       let data: Vec<u8> = (0..FILE_SIZE).map(|i| (i % 256) as u8).collect();
       rt.block_on(async {
           store.put(&path, data.into()).await.unwrap();
       });
   
       let mut group = c.benchmark_group("read_ranges");
   
       for num_ranges in [10, 100, 1000] {
           let ranges = generate_random_ranges(FILE_SIZE, RANGE_SIZE, 
num_ranges);
           let total_bytes = num_ranges as u64 * RANGE_SIZE;
   
           group.throughput(Throughput::Bytes(total_bytes));
           group.bench_with_input(
               BenchmarkId::new("local_fs", num_ranges),
               &ranges,
               |b, ranges| {
                   b.to_async(&rt)
                       .iter(|| async { store.get_ranges(&path, 
ranges).await.unwrap() });
               },
           );
       }
   
       group.finish();
   }
   
   criterion_group!(benches, bench_read_ranges);
   criterion_main!(benches);
   ```
   
   The improvement on my local macbook looks like:
   ```
   read_ranges/local_fs/10 time:   [28.918 µs 29.101 µs 29.320 µs]
                           thrpt:  [2.6021 GiB/s 2.6217 GiB/s 2.6383 GiB/s]
                    change:
                           time:   [−13.736% −12.996% −12.275%] (p = 0.00 < 
0.05)
                           thrpt:  [+13.993% +14.937% +15.923%]
                           Performance has improved.
   Found 5 outliers among 100 measurements (5.00%)
     1 (1.00%) low mild
     3 (3.00%) high mild
     1 (1.00%) high severe
   read_ranges/local_fs/100
                           time:   [113.00 µs 114.28 µs 115.71 µs]
                           thrpt:  [6.5936 GiB/s 6.6763 GiB/s 6.7517 GiB/s]
                    change:
                           time:   [−21.183% −20.507% −19.814%] (p = 0.00 < 
0.05)
                           thrpt:  [+24.709% +25.798% +26.876%]
                           Performance has improved.
   Found 11 outliers among 100 measurements (11.00%)
     1 (1.00%) low mild
     3 (3.00%) high mild
     7 (7.00%) high severe
   Benchmarking read_ranges/local_fs/1000: Warming up for 3.0000 s
   Warning: Unable to complete 100 samples in 5.0s. You may wish to increase 
target time to 7.6s, enable flat sampling, or reduce sample count to 50.
   read_ranges/local_fs/1000
                           time:   [1.1890 ms 1.2145 ms 1.2437 ms]
                           thrpt:  [6.1346 GiB/s 6.2819 GiB/s 6.4165 GiB/s]
                    change:
                           time:   [−19.749% −17.230% −14.523%] (p = 0.00 < 
0.05)
                           thrpt:  [+16.991% +20.817% +24.609%]
                           Performance has improved.
   Found 5 outliers among 100 measurements (5.00%)
     5 (5.00%) high mild
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Only read file metadata once in `LocalFileSystem::read_ranges` [arrow-rs-object-store]

Reply via email to