westonpace commented on a change in pull request #11616:
URL: https://github.com/apache/arrow/pull/11616#discussion_r766936170



##########
File path: cpp/src/arrow/ipc/read_write_benchmark.cc
##########
@@ -49,9 +51,29 @@ std::shared_ptr<RecordBatch> MakeRecordBatch(int64_t 
total_size, int64_t num_fie
   return RecordBatch::Make(schema, length, arrays);
 }
 
+std::vector<int> GetIncludedFields(int64_t num_fields, int64_t 
is_partial_read) {
+  if (is_partial_read) {
+    std::vector<int> field_indices;
+    for (int i = 0; i < num_fields; i += 8) {
+      field_indices.push_back(i);
+    }
+    return field_indices;
+  } else {
+    return std::vector<int>();
+  }
+}
+
+int64_t BytesPerIteration(int64_t num_fields, int64_t is_partial_read, int64_t 
batch_size, int64_t num_batches) {
+  std::size_t num_actual_fields = GetIncludedFields(num_fields, 
is_partial_read).size();
+  double selectivity = num_actual_fields / static_cast<double>(num_fields);
+  if (num_actual_fields == 0) selectivity = 1;
+  auto bytes = batch_size * num_batches * selectivity;
+  return static_cast<int64_t>(bytes);
+}
+
 static void WriteRecordBatch(benchmark::State& state) {  // NOLINT non-const 
reference
   // 1MB
-  constexpr int64_t kTotalSize = 1 << 20;
+  constexpr int64_t kTotalSize = 1 << 23;

Review comment:
       Yes, fair point.  Maybe the benchmark changes can be peeled off onto 
their own PR first so we have some time for the newer numbers to stabilize 
before we actually change any code.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to