emkornfield commented on code in PR #39705:
URL: https://github.com/apache/arrow/pull/39705#discussion_r1462341558
##########
cpp/src/parquet/column_reader_benchmark.cc:
##########
@@ -219,5 +219,87 @@ BENCHMARK(RecordReaderReadRecords)
->Args({2, 1000, true})
->Args({2, 1000, false});
+void GenerateLevels(int level_repeats, int max_level, int num_levels,
+ std::vector<int16_t>& input_levels) {
+ // Generate random levels
+ std::default_random_engine gen(/*seed=*/1943);
+ std::uniform_int_distribution<int16_t> d(0, max_level);
+ for (int i = 0; i < num_levels;) {
+ int16_t current_level = d(gen); // level repeat `level_repeats` times
+ for (int j = 0; j < level_repeats; ++j) {
+ input_levels.push_back(current_level);
+ ++i;
+ }
+ }
+}
+
+void EncodeLevels(Encoding::type encoding, int16_t max_level, int num_levels,
+ const int16_t* input_levels, std::vector<uint8_t>& bytes) {
+ LevelEncoder encoder;
+ int levels_count = 0;
+ bytes.resize(2 * num_levels);
+ ASSERT_EQ(2 * num_levels, static_cast<int>(bytes.size()));
+ // encode levels
+ if (encoding == Encoding::RLE) {
+ // leave space to write the rle length value
+ encoder.Init(encoding, max_level, num_levels, bytes.data() +
sizeof(int32_t),
+ static_cast<int>(bytes.size()));
+
+ levels_count = encoder.Encode(num_levels, input_levels);
+ (reinterpret_cast<int32_t*>(bytes.data()))[0] = encoder.len();
+ } else {
+ encoder.Init(encoding, max_level, num_levels, bytes.data(),
+ static_cast<int>(bytes.size()));
+ levels_count = encoder.Encode(num_levels, input_levels);
+ }
+ ASSERT_EQ(num_levels, levels_count);
+}
+
+static void DecodeLevels(Encoding::type level_encoding, int16_t max_level, int
num_levels,
+ int batch_size, int level_repeat_count,
+ ::benchmark::State& state) {
+ std::vector<uint8_t> bytes;
+ {
+ std::vector<int16_t> input_levels;
+ GenerateLevels(/*level_repeats=*/level_repeat_count,
/*max_repeat_factor=*/max_level,
+ num_levels, input_levels);
+ // Print generated levels
+ EncodeLevels(level_encoding, max_level, num_levels, input_levels.data(),
bytes);
+ }
+
+ LevelDecoder decoder;
+ std::vector<int16_t> output_levels(num_levels);
+ for (auto _ : state) {
+ decoder.SetData(level_encoding, max_level, num_levels, bytes.data(),
Review Comment:
just want to check that we want to include this in timing (I think it is
cheap, but I guess a benchmark should show whether it is or isn't).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]