tustvold commented on a change in pull request #1284:
URL: https://github.com/apache/arrow-rs/pull/1284#discussion_r805164905
##########
File path: parquet/benches/arrow_reader.rs
##########
@@ -351,162 +379,224 @@ fn create_complex_object_byte_array_dictionary_reader(
)
}
-fn add_benches(c: &mut Criterion) {
- const EXPECTED_VALUE_COUNT: usize =
- NUM_ROW_GROUPS * PAGES_PER_GROUP * VALUES_PER_PAGE;
- let mut group = c.benchmark_group("arrow_array_reader");
-
+fn bench_primitive<T>(
+ group: &mut BenchmarkGroup<WallTime>,
+ schema: &SchemaDescPtr,
+ mandatory_column_desc: &ColumnDescPtr,
+ optional_column_desc: &ColumnDescPtr,
+) where
+ T: parquet::data_type::DataType,
+ T::T: SampleUniform + FromPrimitive + Copy,
+{
let mut count: usize = 0;
- let schema = build_test_schema();
- let mandatory_int32_column_desc = schema.column(0);
- let optional_int32_column_desc = schema.column(1);
- let mandatory_string_column_desc = schema.column(2);
- let optional_string_column_desc = schema.column(3);
- // primitive / int32 benchmarks
- // =============================
-
- // int32, plain encoded, no NULLs
- let plain_int32_no_null_data = build_plain_encoded_int32_page_iterator(
+ // plain encoded, no NULLs
+ let data = build_encoded_primitive_page_iterator::<T>(
schema.clone(),
- mandatory_int32_column_desc.clone(),
+ mandatory_column_desc.clone(),
0.0,
+ Encoding::PLAIN,
);
- group.bench_function("read Int32Array, plain encoded, mandatory, no
NULLs", |b| {
+ group.bench_function("plain encoded, mandatory, no NULLs", |b| {
b.iter(|| {
- let array_reader = create_int32_primitive_array_reader(
- plain_int32_no_null_data.clone(),
- mandatory_int32_column_desc.clone(),
+ let array_reader = create_primitive_array_reader(
+ data.clone(),
+ mandatory_column_desc.clone(),
);
count = bench_array_reader(array_reader);
});
assert_eq!(count, EXPECTED_VALUE_COUNT);
});
- let plain_int32_no_null_data = build_plain_encoded_int32_page_iterator(
+ let data = build_encoded_primitive_page_iterator::<T>(
schema.clone(),
- optional_int32_column_desc.clone(),
+ optional_column_desc.clone(),
0.0,
+ Encoding::PLAIN,
);
- group.bench_function("read Int32Array, plain encoded, optional, no NULLs",
|b| {
+ group.bench_function("plain encoded, optional, no NULLs", |b| {
b.iter(|| {
- let array_reader = create_int32_primitive_array_reader(
- plain_int32_no_null_data.clone(),
- optional_int32_column_desc.clone(),
- );
+ let array_reader =
+ create_primitive_array_reader(data.clone(),
optional_column_desc.clone());
count = bench_array_reader(array_reader);
});
assert_eq!(count, EXPECTED_VALUE_COUNT);
});
- // int32, plain encoded, half NULLs
- let plain_int32_half_null_data = build_plain_encoded_int32_page_iterator(
+ // plain encoded, half NULLs
+ let data = build_encoded_primitive_page_iterator::<T>(
schema.clone(),
- optional_int32_column_desc.clone(),
+ optional_column_desc.clone(),
0.5,
+ Encoding::PLAIN,
);
- group.bench_function(
- "read Int32Array, plain encoded, optional, half NULLs",
- |b| {
- b.iter(|| {
- let array_reader = create_int32_primitive_array_reader(
- plain_int32_half_null_data.clone(),
- optional_int32_column_desc.clone(),
- );
- count = bench_array_reader(array_reader);
- });
- assert_eq!(count, EXPECTED_VALUE_COUNT);
- },
+ group.bench_function("plain encoded, optional, half NULLs", |b| {
+ b.iter(|| {
+ let array_reader =
+ create_primitive_array_reader(data.clone(),
optional_column_desc.clone());
+ count = bench_array_reader(array_reader);
+ });
+ assert_eq!(count, EXPECTED_VALUE_COUNT);
+ });
+
+ // binary packed, no NULLs
+ let data = build_encoded_primitive_page_iterator::<T>(
+ schema.clone(),
+ mandatory_column_desc.clone(),
+ 0.0,
+ Encoding::DELTA_BINARY_PACKED,
);
+ group.bench_function("binary packed, mandatory, no NULLs", |b| {
+ b.iter(|| {
+ let array_reader = create_primitive_array_reader(
+ data.clone(),
+ mandatory_column_desc.clone(),
+ );
+ count = bench_array_reader(array_reader);
+ });
+ assert_eq!(count, EXPECTED_VALUE_COUNT);
+ });
- // int32, dictionary encoded, no NULLs
- let dictionary_int32_no_null_data =
build_dictionary_encoded_int32_page_iterator(
+ let data = build_encoded_primitive_page_iterator::<T>(
schema.clone(),
- mandatory_int32_column_desc.clone(),
+ optional_column_desc.clone(),
0.0,
+ Encoding::DELTA_BINARY_PACKED,
);
- group.bench_function(
- "read Int32Array, dictionary encoded, mandatory, no NULLs",
- |b| {
- b.iter(|| {
- let array_reader = create_int32_primitive_array_reader(
- dictionary_int32_no_null_data.clone(),
- mandatory_int32_column_desc.clone(),
- );
- count = bench_array_reader(array_reader);
- });
- assert_eq!(count, EXPECTED_VALUE_COUNT);
- },
+ group.bench_function("binary packed, optional, no NULLs", |b| {
+ b.iter(|| {
+ let array_reader =
+ create_primitive_array_reader(data.clone(),
optional_column_desc.clone());
+ count = bench_array_reader(array_reader);
+ });
+ assert_eq!(count, EXPECTED_VALUE_COUNT);
+ });
+
+ // binary packed, half NULLs
+ let data = build_encoded_primitive_page_iterator::<T>(
+ schema.clone(),
+ optional_column_desc.clone(),
+ 0.5,
+ Encoding::DELTA_BINARY_PACKED,
);
+ group.bench_function("binary packed, optional, half NULLs", |b| {
+ b.iter(|| {
+ let array_reader =
+ create_primitive_array_reader(data.clone(),
optional_column_desc.clone());
+ count = bench_array_reader(array_reader);
+ });
+ assert_eq!(count, EXPECTED_VALUE_COUNT);
+ });
- let dictionary_int32_no_null_data =
build_dictionary_encoded_int32_page_iterator(
+ // dictionary encoded, no NULLs
+ let data = build_dictionary_encoded_primitive_page_iterator::<T>(
schema.clone(),
- optional_int32_column_desc.clone(),
+ mandatory_column_desc.clone(),
0.0,
);
- group.bench_function(
- "read Int32Array, dictionary encoded, optional, no NULLs",
- |b| {
- b.iter(|| {
- let array_reader = create_int32_primitive_array_reader(
- dictionary_int32_no_null_data.clone(),
- optional_int32_column_desc.clone(),
- );
- count = bench_array_reader(array_reader);
- });
- assert_eq!(count, EXPECTED_VALUE_COUNT);
- },
+ group.bench_function("dictionary encoded, mandatory, no NULLs", |b| {
+ b.iter(|| {
+ let array_reader = create_primitive_array_reader(
+ data.clone(),
+ mandatory_column_desc.clone(),
+ );
+ count = bench_array_reader(array_reader);
+ });
+ assert_eq!(count, EXPECTED_VALUE_COUNT);
+ });
+
+ let data = build_dictionary_encoded_primitive_page_iterator::<T>(
+ schema.clone(),
+ optional_column_desc.clone(),
+ 0.0,
);
+ group.bench_function("dictionary encoded, optional, no NULLs", |b| {
+ b.iter(|| {
+ let array_reader =
+ create_primitive_array_reader(data.clone(),
optional_column_desc.clone());
+ count = bench_array_reader(array_reader);
+ });
+ assert_eq!(count, EXPECTED_VALUE_COUNT);
+ });
- // int32, dictionary encoded, half NULLs
- let dictionary_int32_half_null_data =
build_dictionary_encoded_int32_page_iterator(
+ // dictionary encoded, half NULLs
+ let data = build_dictionary_encoded_primitive_page_iterator::<T>(
schema.clone(),
- optional_int32_column_desc.clone(),
+ optional_column_desc.clone(),
0.5,
);
- group.bench_function(
- "read Int32Array, dictionary encoded, optional, half NULLs",
- |b| {
- b.iter(|| {
- let array_reader = create_int32_primitive_array_reader(
- dictionary_int32_half_null_data.clone(),
- optional_int32_column_desc.clone(),
- );
- count = bench_array_reader(array_reader);
- });
- assert_eq!(count, EXPECTED_VALUE_COUNT);
- },
+ group.bench_function("dictionary encoded, optional, half NULLs", |b| {
+ b.iter(|| {
+ let array_reader =
+ create_primitive_array_reader(data.clone(),
optional_column_desc.clone());
+ count = bench_array_reader(array_reader);
+ });
+ assert_eq!(count, EXPECTED_VALUE_COUNT);
+ });
+}
+
+fn add_benches(c: &mut Criterion) {
+ let mut count: usize = 0;
+
+ let schema = build_test_schema();
+ let mandatory_int32_column_desc = schema.column(0);
+ let optional_int32_column_desc = schema.column(1);
+ let mandatory_string_column_desc = schema.column(2);
+ let optional_string_column_desc = schema.column(3);
+ let mandatory_int64_column_desc = schema.column(4);
+ let optional_int64_column_desc = schema.column(5);
+ // primitive / int32 benchmarks
+ // =============================
+
+ let mut group = c.benchmark_group("arrow_array_reader/Int32Array");
+ bench_primitive::<Int32Type>(
+ &mut group,
+ &schema,
+ &mandatory_int32_column_desc,
+ &optional_int32_column_desc,
+ );
+ group.finish();
+
+ // primitive / int64 benchmarks
+ // =============================
+
+ let mut group = c.benchmark_group("arrow_array_reader/Int64Array");
+ bench_primitive::<Int64Type>(
+ &mut group,
+ &schema,
+ &mandatory_int64_column_desc,
+ &optional_int64_column_desc,
);
+ group.finish();
// string benchmarks
//==============================
+ let mut group = c.benchmark_group("arrow_array_reader/StringArray");
+
// string, plain encoded, no NULLs
let plain_string_no_null_data = build_plain_encoded_string_page_iterator(
schema.clone(),
mandatory_string_column_desc.clone(),
0.0,
);
- group.bench_function(
- "read StringArray, plain encoded, mandatory, no NULLs",
- |b| {
- b.iter(|| {
- let array_reader = create_string_byte_array_reader(
- plain_string_no_null_data.clone(),
- mandatory_string_column_desc.clone(),
- );
- count = bench_array_reader(array_reader);
- });
- assert_eq!(count, EXPECTED_VALUE_COUNT);
- },
- );
+ group.bench_function("plain encoded, mandatory, no NULLs", |b| {
Review comment:
The type, i.e. `StringArray` is now encoded in the group name
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]