jecsand838 commented on code in PR #8242: URL: https://github.com/apache/arrow-rs/pull/8242#discussion_r2308885235
########## arrow-avro/benches/decoder.rs: ########## @@ -369,42 +405,55 @@ const MIX_SCHEMA: &str = r#"{"type":"record","name":"MixRec","fields":[{"name":" const NEST_SCHEMA: &str = r#"{"type":"record","name":"NestRec","fields":[{"name":"sub","type":{"type":"record","name":"Sub","fields":[{"name":"x","type":"int"},{"name":"y","type":"string"}]}}]}"#; macro_rules! dataset { - ($name:ident, $schema_json:expr, $gen_fn:ident) => { + (@impl $name:ident, $schema_json:expr, $gen_fn:ident, $prefix_size:expr, $fingerprint_expr:expr) => { static $name: Lazy<Vec<Vec<u8>>> = Lazy::new(|| { let schema = ApacheSchema::parse_str($schema_json).expect("invalid schema for generator"); - let arrow_schema = AvroSchema::new($schema_json.to_string()); - let fingerprint = arrow_schema.fingerprint().expect("fingerprint failed"); - let prefix = make_prefix(fingerprint); + let fingerprint = $fingerprint_expr; + let prefix = make_prefix::<$prefix_size>(fingerprint); SIZES .iter() .map(|&n| $gen_fn(&schema, n, &prefix)) .collect() }); }; + // ID + ($name:ident, $schema_json:expr, $gen_fn:ident, $prefix_size:expr, $id:expr) => { + dataset!(@impl $name, $schema_json, $gen_fn, $prefix_size, Fingerprint::Id($id)); + }; + // Default + ($name:ident, $schema_json:expr, $gen_fn:ident, $prefix_size:expr) => { + dataset!(@impl $name, $schema_json, $gen_fn, $prefix_size, { + let arrow_schema = AvroSchema::new($schema_json.parse().unwrap()); + arrow_schema.fingerprint().expect("fingerprint failed") + }); + }; } -dataset!(INT_DATA, INT_SCHEMA, gen_int); -dataset!(LONG_DATA, LONG_SCHEMA, gen_long); -dataset!(FLOAT_DATA, FLOAT_SCHEMA, gen_float); -dataset!(BOOL_DATA, BOOL_SCHEMA, gen_bool); -dataset!(DOUBLE_DATA, DOUBLE_SCHEMA, gen_double); -dataset!(BYTES_DATA, BYTES_SCHEMA, gen_bytes); -dataset!(STRING_DATA, STRING_SCHEMA, gen_string); -dataset!(DATE_DATA, DATE_SCHEMA, gen_date); -dataset!(TMILLIS_DATA, TMILLIS_SCHEMA, gen_timemillis); -dataset!(TMICROS_DATA, TMICROS_SCHEMA, gen_timemicros); -dataset!(TSMILLIS_DATA, TSMILLIS_SCHEMA, gen_ts_millis); -dataset!(TSMICROS_DATA, TSMICROS_SCHEMA, gen_ts_micros); -dataset!(MAP_DATA, MAP_SCHEMA, gen_map); -dataset!(ARRAY_DATA, ARRAY_SCHEMA, gen_array); -dataset!(DECIMAL_DATA, DECIMAL_SCHEMA, gen_decimal); -dataset!(UUID_DATA, UUID_SCHEMA, gen_uuid); -dataset!(FIXED_DATA, FIXED_SCHEMA, gen_fixed); -dataset!(INTERVAL_DATA, INTERVAL_SCHEMA_ENCODE, gen_interval); -dataset!(ENUM_DATA, ENUM_SCHEMA, gen_enum); -dataset!(MIX_DATA, MIX_SCHEMA, gen_mixed); -dataset!(NEST_DATA, NEST_SCHEMA, gen_nested); +const ID_BENCH_ID: u32 = 7; + +dataset!(INT_DATA_ID, INT_SCHEMA, gen_int, 5, ID_BENCH_ID); +dataset!(INT_DATA, INT_SCHEMA, gen_int, 10); +dataset!(LONG_DATA, LONG_SCHEMA, gen_long, 10); +dataset!(FLOAT_DATA, FLOAT_SCHEMA, gen_float, 10); +dataset!(BOOL_DATA, BOOL_SCHEMA, gen_bool, 10); +dataset!(DOUBLE_DATA, DOUBLE_SCHEMA, gen_double, 10); +dataset!(BYTES_DATA, BYTES_SCHEMA, gen_bytes, 10); +dataset!(STRING_DATA, STRING_SCHEMA, gen_string, 10); +dataset!(DATE_DATA, DATE_SCHEMA, gen_date, 10); +dataset!(TMILLIS_DATA, TMILLIS_SCHEMA, gen_timemillis, 10); +dataset!(TMICROS_DATA, TMICROS_SCHEMA, gen_timemicros, 10); +dataset!(TSMILLIS_DATA, TSMILLIS_SCHEMA, gen_ts_millis, 10); +dataset!(TSMICROS_DATA, TSMICROS_SCHEMA, gen_ts_micros, 10); +dataset!(MAP_DATA, MAP_SCHEMA, gen_map, 10); +dataset!(ARRAY_DATA, ARRAY_SCHEMA, gen_array, 10); +dataset!(DECIMAL_DATA, DECIMAL_SCHEMA, gen_decimal, 10); +dataset!(UUID_DATA, UUID_SCHEMA, gen_uuid, 10); +dataset!(FIXED_DATA, FIXED_SCHEMA, gen_fixed, 10); +dataset!(INTERVAL_DATA, INTERVAL_SCHEMA_ENCODE, gen_interval, 10); +dataset!(ENUM_DATA, ENUM_SCHEMA, gen_enum, 10); +dataset!(MIX_DATA, MIX_SCHEMA, gen_mixed, 10); +dataset!(NEST_DATA, NEST_SCHEMA, gen_nested, 10); Review Comment: Then we can simplify this back to: ```suggestion dataset_id!(INT_DATA_ID, INT_SCHEMA, gen_int, ID_BENCH_ID); dataset!(INT_DATA, INT_SCHEMA, gen_int); dataset!(LONG_DATA, LONG_SCHEMA, gen_long); dataset!(FLOAT_DATA, FLOAT_SCHEMA, gen_float); dataset!(BOOL_DATA, BOOL_SCHEMA, gen_bool); dataset!(DOUBLE_DATA, DOUBLE_SCHEMA, gen_double); dataset!(BYTES_DATA, BYTES_SCHEMA, gen_bytes); dataset!(STRING_DATA, STRING_SCHEMA, gen_string); dataset!(DATE_DATA, DATE_SCHEMA, gen_date); dataset!(TMILLIS_DATA, TMILLIS_SCHEMA, gen_timemillis); dataset!(TMICROS_DATA, TMICROS_SCHEMA, gen_timemicros); dataset!(TSMILLIS_DATA, TSMILLIS_SCHEMA, gen_ts_millis); dataset!(TSMICROS_DATA, TSMICROS_SCHEMA, gen_ts_micros); dataset!(MAP_DATA, MAP_SCHEMA, gen_map); dataset!(ARRAY_DATA, ARRAY_SCHEMA, gen_array); dataset!(DECIMAL_DATA, DECIMAL_SCHEMA, gen_decimal); dataset!(UUID_DATA, UUID_SCHEMA, gen_uuid); dataset!(FIXED_DATA, FIXED_SCHEMA, gen_fixed); dataset!(INTERVAL_DATA, INTERVAL_SCHEMA_ENCODE, gen_interval); dataset!(ENUM_DATA, ENUM_SCHEMA, gen_enum); dataset!(MIX_DATA, MIX_SCHEMA, gen_mixed); dataset!(NEST_DATA, NEST_SCHEMA, gen_nested); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org