Re: [PR] Adds Confluent wire format handling to arrow-avro crate [arrow-rs]

via GitHub Thu, 28 Aug 2025 18:22:07 -0700


jecsand838 commented on code in PR #8242:
URL: https://github.com/apache/arrow-rs/pull/8242#discussion_r2308891173



##########
arrow-avro/benches/decoder.rs:
##########
@@ -447,6 +496,48 @@ fn bench_scenario(
     group.finish();
 }
 
+fn bench_scenario_id(
+    c: &mut Criterion,
+    name: &str,
+    schema_json: &'static str,
+    data_sets: &[Vec<u8>],
+    utf8view: bool,
+    batch_size: usize,
+    id: u32,
+) {
+    let mut group = c.benchmark_group(name);
+    for (idx, &rows) in SIZES.iter().enumerate() {
+        let datum = &data_sets[idx];
+        group.throughput(Throughput::Bytes(datum.len() as u64));
+        match rows {
+            10_000 => {
+                group
+                    .sample_size(25)
+                    .measurement_time(Duration::from_secs(10))
+                    .warm_up_time(Duration::from_secs(3));
+            }
+            1_000_000 => {
+                group
+                    .sample_size(10)
+                    .measurement_time(Duration::from_secs(10))
+                    .warm_up_time(Duration::from_secs(3));
+            }
+            _ => {}
+        }
+        group.bench_function(BenchmarkId::from_parameter(rows), |b| {
+            b.iter_batched_ref(
+                || new_decoder_id(schema_json, batch_size, utf8view, id),
+                |decoder| {
+                    black_box(decoder.decode(datum).unwrap());
+                    black_box(decoder.flush().unwrap().unwrap());
+                },
+                BatchSize::SmallInput,
+            )
+        });
+    }
+    group.finish();
+}

Review Comment:
   There's alot of duplication between `bench_scenario_id` and 
`bench_scenario`. 
   
   I'd recommend unifying them with a builder closure along these lines:
   ```suggestion
   fn bench_with_decoder<F>(
       c: &mut Criterion,
       name: &str,
       data_sets: &[Vec<u8>],
       batch_size: usize,
       rows: &[usize],
       mut new_decoder: F,
   ) where
       F: FnMut() -> arrow_avro::reader::Decoder,
   {
       let mut group = c.benchmark_group(name);
       for (idx, &row_count) in rows.iter().enumerate() {
           let datum = &data_sets[idx];
           group.throughput(Throughput::Bytes(datum.len() as u64));
           match row_count {
               10_000 => {
                   group
                       .sample_size(25)
                       .measurement_time(Duration::from_secs(10))
                       .warm_up_time(Duration::from_secs(3));
               }
               1_000_000 => {
                   group
                       .sample_size(10)
                       .measurement_time(Duration::from_secs(10))
                       .warm_up_time(Duration::from_secs(3));
               }
               _ => {}
           }
           group.bench_function(BenchmarkId::from_parameter(row_count), |b| {
               b.iter_batched_ref(
                   || new_decoder(),
                   |decoder| {
                       black_box(decoder.decode(datum).unwrap());
                       black_box(decoder.flush().unwrap().unwrap());
                   },
                   BatchSize::SmallInput,
               )
           });
       }
       group.finish();
   }
   ```
   
   Then you'd be able to use `bench_with_decoder` below like this:
   
   ```rust
   bench_with_decoder(
       c, 
       "Int32",
       &INT_DATA, 
       batch_size, 
       &SIZES,
       || new_decoder(INT_SCHEMA, batch_size, false),
   );
   
   bench_with_decoder(
       c, 
       "Int32_Id",
       &INT_DATA_ID, 
       batch_size, 
       &SIZES,
       || new_decoder_id(INT_SCHEMA, batch_size, false, ID_BENCH_ID),
   );
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Re: [PR] Adds Confluent wire format handling to arrow-avro crate [arrow-rs]

Reply via email to