mapleFU commented on code in PR #46532:
URL: https://github.com/apache/arrow/pull/46532#discussion_r2108452132


##########
cpp/src/parquet/arrow/reader_writer_benchmark.cc:
##########
@@ -454,28 +459,44 @@ BENCHMARK_TEMPLATE2(BM_ReadColumnPlain, true, 
Float16LogicalType)
 // Benchmark reading binary column
 //
 
-static void BM_ReadBinaryColumn(::benchmark::State& state) {
+static void BenchmarkReadBinaryColumn(::benchmark::State& state,
+                                      const 
std::shared_ptr<::arrow::DataType>& type) {
   std::shared_ptr<Table> table =
-      RandomStringTable(BENCHMARK_SIZE, state.range(1), state.range(0));
+      RandomStringTable(type, BENCHMARK_SIZE, state.range(1), state.range(0));
 
-  // Offsets + data
-  int64_t total_bytes = table->column(0)->chunk(0)->data()->buffers[1]->size() 
+
-                        table->column(0)->chunk(0)->data()->buffers[2]->size();
+  // Offsets / views + data
+  int64_t total_bytes = 0;
+  const ::arrow::ArrayData& column = *table->column(0)->chunk(0)->data();
+  for (size_t i = 1; i < column.buffers.size(); ++i) {
+    total_bytes += column.buffers[i]->size();
+  }
   BenchmarkReadTable(state, *table, table->num_rows(), total_bytes);
 }
 
-BENCHMARK(BM_ReadBinaryColumn)
-    ->ArgNames({"null_probability", "unique_values"})
-    // We vary unique values to trigger the dictionary-encoded (for 
low-cardinality)
-    // and plain (for high-cardinality) code paths.
-    ->Args({0, 32})
-    ->Args({0, kInfiniteUniqueValues})
-    ->Args({1, 32})
-    ->Args({50, 32})
-    ->Args({99, 32})
-    ->Args({1, kInfiniteUniqueValues})
-    ->Args({50, kInfiniteUniqueValues})
-    ->Args({99, kInfiniteUniqueValues});
+static void SetReadBinaryColumnArgs(benchmark::internal::Benchmark* b) {
+  b->ArgNames({"null_probability", "unique_values"})
+      // We vary unique values to trigger the dictionary-encoded (for 
low-cardinality)
+      // and plain (for high-cardinality) code paths.
+      ->Args({0, 32})
+      ->Args({0, kInfiniteUniqueValues})
+      ->Args({1, 32})
+      ->Args({50, 32})
+      ->Args({99, 32})
+      ->Args({1, kInfiniteUniqueValues})
+      ->Args({50, kInfiniteUniqueValues})
+      ->Args({99, kInfiniteUniqueValues});
+}
+
+static void BM_ReadBinaryColumn(::benchmark::State& state) {

Review Comment:
   Do we have benchmark result here?



##########
cpp/src/parquet/arrow/reader.cc:
##########
@@ -451,8 +452,17 @@ class LeafReader : public ColumnReaderImpl {
         field_(std::move(field)),
         input_(std::move(input)),
         descr_(input_->descr()) {
+    const auto type_id = field_->type()->id();
+    // If binary-like, RecordReader is able to read directly as the concrete 
type
+    // so as to avoid offset limitations.
+    std::shared_ptr<DataType> type_for_reading =

Review Comment:
   can const ref being used here ( which might be a little tricky )?
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to