mapleFU commented on code in PR #39705:
URL: https://github.com/apache/arrow/pull/39705#discussion_r1474632237
##########
cpp/src/parquet/column_reader_benchmark.cc:
##########
@@ -219,5 +219,99 @@ BENCHMARK(RecordReaderReadRecords)
->Args({2, 1000, true})
->Args({2, 1000, false});
+void GenerateLevels(int level_repeats, int max_level, int num_levels,
+ std::vector<int16_t>* levels) {
+ // Generate random levels
+ std::default_random_engine gen(/*seed=*/1943);
+ std::uniform_int_distribution<int16_t> d(0, max_level);
+ for (int i = 0; i < num_levels;) {
+ int16_t current_level = d(gen); // level repeat `level_repeats` times
+ const int current_repeated = std::min(level_repeats, num_levels - i);
+ levels->insert(levels->end(), current_repeated, current_level);
+ i += current_repeated;
+ }
+}
+
+void EncodeLevels(Encoding::type encoding, int16_t max_level, int num_levels,
+ const int16_t* input_levels, std::vector<uint8_t>* bytes) {
+ LevelEncoder encoder;
+ bytes->resize(2 * num_levels);
+ // encode levels
+ if (encoding == Encoding::RLE) {
+ // leave space to write the rle length value
+ encoder.Init(encoding, max_level, num_levels, bytes->data() +
sizeof(int32_t),
+ static_cast<int>(bytes->size()));
+
+ encoder.Encode(num_levels, input_levels);
+ (reinterpret_cast<int32_t*>(bytes->data()))[0] = encoder.len();
+ } else {
+ encoder.Init(encoding, max_level, num_levels, bytes->data(),
+ static_cast<int>(bytes->size()));
+ encoder.Encode(num_levels, input_levels);
+ }
+}
+
+static void DecodeLevels(Encoding::type level_encoding, int16_t max_level, int
num_levels,
+ int batch_size, int level_repeat_count,
+ ::benchmark::State& state) {
+ std::vector<uint8_t> bytes;
+ {
+ std::vector<int16_t> input_levels;
+ GenerateLevels(/*level_repeats=*/level_repeat_count,
/*max_repeat_factor=*/max_level,
+ num_levels, &input_levels);
+ EncodeLevels(level_encoding, max_level, num_levels, input_levels.data(),
&bytes);
+ }
+
+ LevelDecoder decoder;
+ std::vector<int16_t> output_levels(num_levels);
Review Comment:
Hmmm during reading, isn't they're both important?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]