felipecrv commented on code in PR #43772:
URL: https://github.com/apache/arrow/pull/43772#discussion_r1723727164


##########
cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc:
##########
@@ -198,29 +209,41 @@ struct TakeBenchmark {
     for (auto _ : state) {
       ABORT_NOT_OK(Take(values, indices).status());
     }
-    state.SetItemsProcessed(state.iterations() * values->length());
+    state.SetItemsProcessed(state.iterations() * num_indices);
+    state.counters["selection_factor"] = selection_factor;
   }
 
   void BenchChunked(const std::shared_ptr<ChunkedArray>& values, bool 
chunk_indices_too) {
     double indices_null_proportion = indices_have_nulls ? args.null_proportion 
: 0;
-    auto indices =
-        rand.Int32(values->length(), 0, static_cast<int32_t>(values->length() 
- 1),
-                   indices_null_proportion);
+    const int64_t num_indices = static_cast<int64_t>(selection_factor * 
values->length());
+    auto indices = rand.Int32(num_indices, 0, 
static_cast<int32_t>(values->length() - 1),
+                              indices_null_proportion);
 
     if (monotonic_indices) {
       auto arg_sorter = *SortIndices(*indices);
       indices = *Take(*indices, *arg_sorter);
     }
     std::shared_ptr<ChunkedArray> chunked_indices;
     if (chunk_indices_too) {
+      // Here we choose for indices chunks to have roughly the same length
+      // as values chunks, but there may be less of them if selection_factor < 
1.0.
+      // The alternative is to have the same number of chunks, but with a 
potentially
+      // much smaller (and irrealistic) length.

Review Comment:
   But you should guarantee at least 2 chunks.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to