jecsand838 commented on code in PR #8242:
URL: https://github.com/apache/arrow-rs/pull/8242#discussion_r2308885235


##########
arrow-avro/benches/decoder.rs:
##########
@@ -369,42 +405,55 @@ const MIX_SCHEMA: &str = 
r#"{"type":"record","name":"MixRec","fields":[{"name":"
 const NEST_SCHEMA: &str = 
r#"{"type":"record","name":"NestRec","fields":[{"name":"sub","type":{"type":"record","name":"Sub","fields":[{"name":"x","type":"int"},{"name":"y","type":"string"}]}}]}"#;
 
 macro_rules! dataset {
-    ($name:ident, $schema_json:expr, $gen_fn:ident) => {
+    (@impl $name:ident, $schema_json:expr, $gen_fn:ident, $prefix_size:expr, 
$fingerprint_expr:expr) => {
         static $name: Lazy<Vec<Vec<u8>>> = Lazy::new(|| {
             let schema =
                 ApacheSchema::parse_str($schema_json).expect("invalid schema 
for generator");
-            let arrow_schema = AvroSchema::new($schema_json.to_string());
-            let fingerprint = arrow_schema.fingerprint().expect("fingerprint 
failed");
-            let prefix = make_prefix(fingerprint);
+            let fingerprint = $fingerprint_expr;
+            let prefix = make_prefix::<$prefix_size>(fingerprint);
             SIZES
                 .iter()
                 .map(|&n| $gen_fn(&schema, n, &prefix))
                 .collect()
         });
     };
+    // ID
+    ($name:ident, $schema_json:expr, $gen_fn:ident, $prefix_size:expr, 
$id:expr) => {
+        dataset!(@impl $name, $schema_json, $gen_fn, $prefix_size, 
Fingerprint::Id($id));
+    };
+    // Default
+    ($name:ident, $schema_json:expr, $gen_fn:ident, $prefix_size:expr) => {
+        dataset!(@impl $name, $schema_json, $gen_fn, $prefix_size, {
+            let arrow_schema = AvroSchema::new($schema_json.parse().unwrap());
+            arrow_schema.fingerprint().expect("fingerprint failed")
+        });
+    };
 }
 
-dataset!(INT_DATA, INT_SCHEMA, gen_int);
-dataset!(LONG_DATA, LONG_SCHEMA, gen_long);
-dataset!(FLOAT_DATA, FLOAT_SCHEMA, gen_float);
-dataset!(BOOL_DATA, BOOL_SCHEMA, gen_bool);
-dataset!(DOUBLE_DATA, DOUBLE_SCHEMA, gen_double);
-dataset!(BYTES_DATA, BYTES_SCHEMA, gen_bytes);
-dataset!(STRING_DATA, STRING_SCHEMA, gen_string);
-dataset!(DATE_DATA, DATE_SCHEMA, gen_date);
-dataset!(TMILLIS_DATA, TMILLIS_SCHEMA, gen_timemillis);
-dataset!(TMICROS_DATA, TMICROS_SCHEMA, gen_timemicros);
-dataset!(TSMILLIS_DATA, TSMILLIS_SCHEMA, gen_ts_millis);
-dataset!(TSMICROS_DATA, TSMICROS_SCHEMA, gen_ts_micros);
-dataset!(MAP_DATA, MAP_SCHEMA, gen_map);
-dataset!(ARRAY_DATA, ARRAY_SCHEMA, gen_array);
-dataset!(DECIMAL_DATA, DECIMAL_SCHEMA, gen_decimal);
-dataset!(UUID_DATA, UUID_SCHEMA, gen_uuid);
-dataset!(FIXED_DATA, FIXED_SCHEMA, gen_fixed);
-dataset!(INTERVAL_DATA, INTERVAL_SCHEMA_ENCODE, gen_interval);
-dataset!(ENUM_DATA, ENUM_SCHEMA, gen_enum);
-dataset!(MIX_DATA, MIX_SCHEMA, gen_mixed);
-dataset!(NEST_DATA, NEST_SCHEMA, gen_nested);
+const ID_BENCH_ID: u32 = 7;
+
+dataset!(INT_DATA_ID, INT_SCHEMA, gen_int, 5, ID_BENCH_ID);
+dataset!(INT_DATA, INT_SCHEMA, gen_int, 10);
+dataset!(LONG_DATA, LONG_SCHEMA, gen_long, 10);
+dataset!(FLOAT_DATA, FLOAT_SCHEMA, gen_float, 10);
+dataset!(BOOL_DATA, BOOL_SCHEMA, gen_bool, 10);
+dataset!(DOUBLE_DATA, DOUBLE_SCHEMA, gen_double, 10);
+dataset!(BYTES_DATA, BYTES_SCHEMA, gen_bytes, 10);
+dataset!(STRING_DATA, STRING_SCHEMA, gen_string, 10);
+dataset!(DATE_DATA, DATE_SCHEMA, gen_date, 10);
+dataset!(TMILLIS_DATA, TMILLIS_SCHEMA, gen_timemillis, 10);
+dataset!(TMICROS_DATA, TMICROS_SCHEMA, gen_timemicros, 10);
+dataset!(TSMILLIS_DATA, TSMILLIS_SCHEMA, gen_ts_millis, 10);
+dataset!(TSMICROS_DATA, TSMICROS_SCHEMA, gen_ts_micros, 10);
+dataset!(MAP_DATA, MAP_SCHEMA, gen_map, 10);
+dataset!(ARRAY_DATA, ARRAY_SCHEMA, gen_array, 10);
+dataset!(DECIMAL_DATA, DECIMAL_SCHEMA, gen_decimal, 10);
+dataset!(UUID_DATA, UUID_SCHEMA, gen_uuid, 10);
+dataset!(FIXED_DATA, FIXED_SCHEMA, gen_fixed, 10);
+dataset!(INTERVAL_DATA, INTERVAL_SCHEMA_ENCODE, gen_interval, 10);
+dataset!(ENUM_DATA, ENUM_SCHEMA, gen_enum, 10);
+dataset!(MIX_DATA, MIX_SCHEMA, gen_mixed, 10);
+dataset!(NEST_DATA, NEST_SCHEMA, gen_nested, 10);

Review Comment:
   Then we can simplify this back to:
   
   ```suggestion
   dataset_id!(INT_DATA_ID, INT_SCHEMA, gen_int, ID_BENCH_ID);
   dataset!(INT_DATA, INT_SCHEMA, gen_int);
   dataset!(LONG_DATA, LONG_SCHEMA, gen_long);
   dataset!(FLOAT_DATA, FLOAT_SCHEMA, gen_float);
   dataset!(BOOL_DATA, BOOL_SCHEMA, gen_bool);
   dataset!(DOUBLE_DATA, DOUBLE_SCHEMA, gen_double);
   dataset!(BYTES_DATA, BYTES_SCHEMA, gen_bytes);
   dataset!(STRING_DATA, STRING_SCHEMA, gen_string);
   dataset!(DATE_DATA, DATE_SCHEMA, gen_date);
   dataset!(TMILLIS_DATA, TMILLIS_SCHEMA, gen_timemillis);
   dataset!(TMICROS_DATA, TMICROS_SCHEMA, gen_timemicros);
   dataset!(TSMILLIS_DATA, TSMILLIS_SCHEMA, gen_ts_millis);
   dataset!(TSMICROS_DATA, TSMICROS_SCHEMA, gen_ts_micros);
   dataset!(MAP_DATA, MAP_SCHEMA, gen_map);
   dataset!(ARRAY_DATA, ARRAY_SCHEMA, gen_array);
   dataset!(DECIMAL_DATA, DECIMAL_SCHEMA, gen_decimal);
   dataset!(UUID_DATA, UUID_SCHEMA, gen_uuid);
   dataset!(FIXED_DATA, FIXED_SCHEMA, gen_fixed);
   dataset!(INTERVAL_DATA, INTERVAL_SCHEMA_ENCODE, gen_interval);
   dataset!(ENUM_DATA, ENUM_SCHEMA, gen_enum);
   dataset!(MIX_DATA, MIX_SCHEMA, gen_mixed);
   dataset!(NEST_DATA, NEST_SCHEMA, gen_nested);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to