pitrou commented on code in PR #36298:
URL: https://github.com/apache/arrow/pull/36298#discussion_r1243951959


##########
cpp/src/parquet/encoding_benchmark.cc:
##########
@@ -782,6 +782,27 @@ static void BM_RleDecodingSpacedBoolean(benchmark::State& 
state) {
 }
 BENCHMARK(BM_RleDecodingSpacedBoolean)->Apply(BM_SpacedArgs);
 
+template <typename Type>
+static void EncodeDict(std::vector<typename Type::c_type>& values,

Review Comment:
   If `values` is immutable then please pass a const-ref, otherwise a non-const 
pointer.



##########
cpp/src/parquet/encoding_benchmark.cc:
##########
@@ -782,6 +782,27 @@ static void BM_RleDecodingSpacedBoolean(benchmark::State& 
state) {
 }
 BENCHMARK(BM_RleDecodingSpacedBoolean)->Apply(BM_SpacedArgs);
 
+template <typename Type>
+static void EncodeDict(std::vector<typename Type::c_type>& values,
+                       benchmark::State& state) {
+  using T = typename Type::c_type;
+  int num_values = static_cast<int>(values.size());
+
+  MemoryPool* allocator = default_memory_pool();
+  std::shared_ptr<ColumnDescriptor> descr = Int64Schema(Repetition::REQUIRED);
+
+  auto base_encoder = MakeEncoder(Type::type_num, Encoding::RLE_DICTIONARY,
+                                  /*use_dictionary=*/true, descr.get(), 
allocator);
+  auto encoder =
+      dynamic_cast<typename 
EncodingTraits<Type>::Encoder*>(base_encoder.get());
+  for (auto _ : state) {
+    encoder->Put(values.data(), num_values);
+    encoder->FlushValues();
+  }
+
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(T));

Review Comment:
   Can you also add a `SetItemsProcessed` here and possibly in other benchmark 
functions?



##########
cpp/src/parquet/encoding_benchmark.cc:
##########
@@ -847,6 +878,19 @@ static void 
BM_DictDecodingInt64_literals(benchmark::State& state) {
 
 BENCHMARK(BM_DictDecodingInt64_literals)->Range(MIN_RANGE, MAX_RANGE);
 
+static void BM_DictEncodingInt64_literals(benchmark::State& state) {
+  using Type = Int64Type;
+  using T = typename Type::c_type;
+
+  std::vector<T> values(state.range(0));
+  for (size_t i = 0; i < values.size(); ++i) {

Review Comment:
   Nit: can use  `std::iota` here and above.



##########
cpp/src/parquet/encoding_benchmark.cc:
##########
@@ -782,6 +782,27 @@ static void BM_RleDecodingSpacedBoolean(benchmark::State& 
state) {
 }
 BENCHMARK(BM_RleDecodingSpacedBoolean)->Apply(BM_SpacedArgs);
 
+template <typename Type>
+static void EncodeDict(std::vector<typename Type::c_type>& values,
+                       benchmark::State& state) {
+  using T = typename Type::c_type;
+  int num_values = static_cast<int>(values.size());
+
+  MemoryPool* allocator = default_memory_pool();
+  std::shared_ptr<ColumnDescriptor> descr = Int64Schema(Repetition::REQUIRED);
+
+  auto base_encoder = MakeEncoder(Type::type_num, Encoding::RLE_DICTIONARY,
+                                  /*use_dictionary=*/true, descr.get(), 
allocator);
+  auto encoder =
+      dynamic_cast<typename 
EncodingTraits<Type>::Encoder*>(base_encoder.get());
+  for (auto _ : state) {
+    encoder->Put(values.data(), num_values);
+    encoder->FlushValues();
+  }
+
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(T));

Review Comment:
   And by the way:
   ```suggestion
     state.SetBytesProcessed(state.iterations() * num_values * sizeof(T));
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to