emkornfield commented on a change in pull request #8203:
URL: https://github.com/apache/arrow/pull/8203#discussion_r489554513
##########
File path: cpp/src/parquet/arrow/reader_writer_benchmark.cc
##########
@@ -280,6 +298,81 @@ BENCHMARK_TEMPLATE2(BM_ReadColumn, true, BooleanType)
->Args({kAlternatingOrNa, 1})
->Args({5, 10});
+//
+// Benchmark reading a nested column
+//
+
+static void BM_ReadStructColumn(::benchmark::State& state) {
+ constexpr int64_t kNumValues = BENCHMARK_SIZE / 10;
+ const double null_probability = static_cast<double>(state.range(0)) / 100.0;
+ const bool nullable = (null_probability != 0.0);
+
+ ARROW_CHECK_GE(null_probability, 0.0);
+
+ ::arrow::random::RandomArrayGenerator rng(42);
+
+ auto values1 = rng.Int32(kNumValues, -5, 5, null_probability);
+ auto values2 =
+ rng.Int64(kNumValues, -12345678912345LL, 12345678912345LL,
null_probability);
+
+ const int64_t kBytesPerValue = sizeof(int32_t) + sizeof(int64_t);
+
+ std::shared_ptr<::arrow::Buffer> null_bitmap;
+ if (nullable) {
+ null_bitmap = rng.NullBitmap(kNumValues, null_probability);
+ }
+ auto array = *::arrow::StructArray::Make(
+ {values1, values2},
+ ::arrow::FieldVector{field("a", values1->type(), nullable),
+ field("b", values2->type(), nullable)},
+ null_bitmap);
+ auto schema = ::arrow::schema({field("s", array->type(), nullable)});
+ auto table = ::arrow::Table::Make(schema, {array}, array->length());
+
+ EXIT_NOT_OK(table->Validate());
+
+ BenchmarkReadTable(state, *table, kNumValues, kBytesPerValue);
+}
+
+BENCHMARK(BM_ReadStructColumn)
+ ->Arg(/*null_percentage=*/0)
+ ->Arg(/*null_percentage=*/1)
+ ->Arg(/*null_percentage=*/50)
+ ->Arg(/*null_percentage=*/99);
+
+static void BM_ReadListColumn(::benchmark::State& state) {
+ constexpr int64_t kNumValues = BENCHMARK_SIZE / 10;
+ const double null_probability = static_cast<double>(state.range(0)) / 100.0;
+ const bool nullable = (null_probability != 0.0);
+
+ ARROW_CHECK_GE(null_probability, 0.0);
+
+ ::arrow::random::RandomArrayGenerator rng(42);
+
+ auto values = rng.Int64(kNumValues, -5, 5, null_probability);
+ auto offsets = rng.Offsets(kNumValues / 10, 0, values->length(),
null_probability);
Review comment:
so lists get larger as overall array goes up?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]