jecsand838 commented on code in PR #8242: URL: https://github.com/apache/arrow-rs/pull/8242#discussion_r2308891173
########## arrow-avro/benches/decoder.rs: ########## @@ -447,6 +496,48 @@ fn bench_scenario( group.finish(); } +fn bench_scenario_id( + c: &mut Criterion, + name: &str, + schema_json: &'static str, + data_sets: &[Vec<u8>], + utf8view: bool, + batch_size: usize, + id: u32, +) { + let mut group = c.benchmark_group(name); + for (idx, &rows) in SIZES.iter().enumerate() { + let datum = &data_sets[idx]; + group.throughput(Throughput::Bytes(datum.len() as u64)); + match rows { + 10_000 => { + group + .sample_size(25) + .measurement_time(Duration::from_secs(10)) + .warm_up_time(Duration::from_secs(3)); + } + 1_000_000 => { + group + .sample_size(10) + .measurement_time(Duration::from_secs(10)) + .warm_up_time(Duration::from_secs(3)); + } + _ => {} + } + group.bench_function(BenchmarkId::from_parameter(rows), |b| { + b.iter_batched_ref( + || new_decoder_id(schema_json, batch_size, utf8view, id), + |decoder| { + black_box(decoder.decode(datum).unwrap()); + black_box(decoder.flush().unwrap().unwrap()); + }, + BatchSize::SmallInput, + ) + }); + } + group.finish(); +} Review Comment: There's alot of duplication between `bench_scenario_id` and `bench_scenario`. I'd recommend unifying them with a builder closure along these lines: ```suggestion fn bench_with_decoder<F>( c: &mut Criterion, name: &str, data_sets: &[Vec<u8>], batch_size: usize, rows: &[usize], mut new_decoder: F, ) where F: FnMut() -> arrow_avro::reader::Decoder, { let mut group = c.benchmark_group(name); for (idx, &row_count) in rows.iter().enumerate() { let datum = &data_sets[idx]; group.throughput(Throughput::Bytes(datum.len() as u64)); match row_count { 10_000 => { group .sample_size(25) .measurement_time(Duration::from_secs(10)) .warm_up_time(Duration::from_secs(3)); } 1_000_000 => { group .sample_size(10) .measurement_time(Duration::from_secs(10)) .warm_up_time(Duration::from_secs(3)); } _ => {} } group.bench_function(BenchmarkId::from_parameter(row_count), |b| { b.iter_batched_ref( || new_decoder(), |decoder| { black_box(decoder.decode(datum).unwrap()); black_box(decoder.flush().unwrap().unwrap()); }, BatchSize::SmallInput, ) }); } group.finish(); } ``` Then you'd be able to use `bench_with_decoder` below like this: ```rust bench_with_decoder( c, "Int32", &INT_DATA, batch_size, &SIZES, || new_decoder(INT_SCHEMA, batch_size, false), ); bench_with_decoder( c, "Int32_Id", &INT_DATA_ID, batch_size, &SIZES, || new_decoder_id(INT_SCHEMA, batch_size, false, ID_BENCH_ID), ); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org