[arrow] branch master updated: GH-15231: [C++][Benchmarking] Add new memory pool metrics and track in benchmarks (#33731)

wjones127 Mon, 13 Feb 2023 14:28:56 -0800

This is an automated email from the ASF dual-hosted git repository.

wjones127 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new ddfa8eed9b GH-15231: [C++][Benchmarking] Add new memory pool metrics 
and track in benchmarks (#33731)
ddfa8eed9b is described below

commit ddfa8eed9b188fcc7b38767d1858c2588c588f05
Author: Will Jones <[email protected]>
AuthorDate: Mon Feb 13 14:28:40 2023 -0800

    GH-15231: [C++][Benchmarking] Add new memory pool metrics and track in 
benchmarks (#33731)
    
    ### Rationale for this change
    
    We would like to track memory usage in many of our benchmarks. In order to 
isolate memory metrics per benchmark, we pass in a new `ProxyMemoryPool` to 
each benchmark, and collect the metrics from that at the conclusion of each 
benchmark.
    
    This only works for benchmarks where there are no allocations that bypass 
the passed pool and instead use the `default_memory_pool()`. Unfortunately most 
do right now, so this is only enabled for two benchmarks: builders and CSV 
converter.
    
    ### What changes are included in this PR?
    
     * `MemoryPool` now tracks both `num_allocations` and 
`total_bytes_allocated`.
     * There is now a class `BenchmarkMemoryTracker` which can be instantiated 
in benchmark files to get access to the `ProxyMemoryPool` that takes 
measurements and saves them to Google benchmark.
     * Wired up memory tracking to `arrow-csv-converter-benchmark` and 
`arrow-builder-benchmark`
    
    ### Are these changes tested?
    
    * [x] Add tests
    
    ### Are there any user-facing changes?
    
    * [x] Validate performance impact of new metrics in memory pools.
    
    No breaking changes.
    * Closes: #15231
    
    Authored-by: Will Jones <[email protected]>
    Signed-off-by: Will Jones <[email protected]>
---
 cpp/src/arrow/builder_benchmark.cc           | 23 +++++-----
 cpp/src/arrow/compute/exec/benchmark_util.cc | 25 ++++++-----
 cpp/src/arrow/compute/exec/benchmark_util.h  |  8 +++-
 cpp/src/arrow/csv/converter_benchmark.cc     |  9 +++-
 cpp/src/arrow/csv/test_common.cc             |  8 ++--
 cpp/src/arrow/csv/test_common.h              |  2 +-
 cpp/src/arrow/io/file_test.cc                |  4 +-
 cpp/src/arrow/memory_pool.cc                 | 32 +++++++++++++-
 cpp/src/arrow/memory_pool.h                  | 38 ++++++++++++++--
 cpp/src/arrow/memory_pool_test.cc            | 32 +++++++++++---
 cpp/src/arrow/stl_allocator.h                |  8 +++-
 cpp/src/arrow/util/benchmark_util.h          | 65 ++++++++++++++++++++++++++++
 cpp/thirdparty/versions.txt                  |  4 +-
 java/dataset/src/main/cpp/jni_util.cc        | 12 +++++
 java/dataset/src/main/cpp/jni_util.h         |  4 ++
 r/src/memorypool.cpp                         |  6 +++
 16 files changed, 235 insertions(+), 45 deletions(-)

diff --git a/cpp/src/arrow/builder_benchmark.cc 
b/cpp/src/arrow/builder_benchmark.cc
index cf3e7f32d5..e639a900cc 100644
--- a/cpp/src/arrow/builder_benchmark.cc
+++ b/cpp/src/arrow/builder_benchmark.cc
@@ -29,11 +29,14 @@
 #include "arrow/builder.h"
 #include "arrow/memory_pool.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/benchmark_util.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/decimal.h"
 
 namespace arrow {
 
+::arrow::BenchmarkMemoryTracker memory_tracker;
+
 using ValueType = int64_t;
 using VectorType = std::vector<ValueType>;
 
@@ -59,7 +62,7 @@ static std::string_view kBinaryView(kBinaryString);
 
 static void BuildIntArrayNoNulls(benchmark::State& state) {  // NOLINT 
non-const reference
   for (auto _ : state) {
-    Int64Builder builder;
+    Int64Builder builder(memory_tracker.memory_pool());
 
     for (int i = 0; i < kRounds; i++) {
       ABORT_NOT_OK(builder.AppendValues(kData.data(), kData.size(), nullptr));
@@ -75,7 +78,7 @@ static void BuildIntArrayNoNulls(benchmark::State& state) {  
// NOLINT non-const
 static void BuildAdaptiveIntNoNulls(
     benchmark::State& state) {  // NOLINT non-const reference
   for (auto _ : state) {
-    AdaptiveIntBuilder builder;
+    AdaptiveIntBuilder builder(memory_tracker.memory_pool());
 
     for (int i = 0; i < kRounds; i++) {
       ABORT_NOT_OK(builder.AppendValues(kData.data(), kData.size(), nullptr));
@@ -91,7 +94,7 @@ static void BuildAdaptiveIntNoNulls(
 static void BuildAdaptiveIntNoNullsScalarAppend(
     benchmark::State& state) {  // NOLINT non-const reference
   for (auto _ : state) {
-    AdaptiveIntBuilder builder;
+    AdaptiveIntBuilder builder(memory_tracker.memory_pool());
 
     for (int i = 0; i < kRounds; i++) {
       for (size_t j = 0; j < kData.size(); j++) {
@@ -113,7 +116,7 @@ static void BuildBooleanArrayNoNulls(
   const uint8_t* data = reinterpret_cast<const uint8_t*>(kData.data());
 
   for (auto _ : state) {
-    BooleanBuilder builder;
+    BooleanBuilder builder(memory_tracker.memory_pool());
 
     for (int i = 0; i < kRounds; i++) {
       ABORT_NOT_OK(builder.AppendValues(data, n_bytes));
@@ -128,7 +131,7 @@ static void BuildBooleanArrayNoNulls(
 
 static void BuildBinaryArray(benchmark::State& state) {  // NOLINT non-const 
reference
   for (auto _ : state) {
-    BinaryBuilder builder;
+    BinaryBuilder builder(memory_tracker.memory_pool());
 
     for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) {
       ABORT_NOT_OK(builder.Append(kBinaryView));
@@ -147,7 +150,7 @@ static void BuildChunkedBinaryArray(
   const int32_t kChunkSize = 1 << 20;
 
   for (auto _ : state) {
-    internal::ChunkedBinaryBuilder builder(kChunkSize);
+    internal::ChunkedBinaryBuilder builder(kChunkSize, 
memory_tracker.memory_pool());
 
     for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) {
       ABORT_NOT_OK(builder.Append(kBinaryView));
@@ -165,7 +168,7 @@ static void BuildFixedSizeBinaryArray(
   auto type = fixed_size_binary(static_cast<int32_t>(kBinaryView.size()));
 
   for (auto _ : state) {
-    FixedSizeBinaryBuilder builder(type);
+    FixedSizeBinaryBuilder builder(type, memory_tracker.memory_pool());
 
     for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) {
       ABORT_NOT_OK(builder.Append(kBinaryView));
@@ -185,7 +188,7 @@ static void BuildDecimalArray(benchmark::State& state) {  
// NOLINT non-const re
   int32_t scale = 0;
   ABORT_NOT_OK(Decimal128::FromString("1234.1234", &value, &precision, 
&scale));
   for (auto _ : state) {
-    Decimal128Builder builder(type);
+    Decimal128Builder builder(type, memory_tracker.memory_pool());
 
     for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) {
       ABORT_NOT_OK(builder.Append(value));
@@ -298,7 +301,7 @@ static void BenchmarkDictionaryArray(
     benchmark::State& state,  // NOLINT non-const reference
     const std::vector<Scalar>& fodder, size_t fodder_nbytes = 0) {
   for (auto _ : state) {
-    DictionaryBuilderType builder(default_memory_pool());
+    DictionaryBuilderType builder(memory_tracker.memory_pool());
 
     for (int64_t i = 0; i < kRounds; i++) {
       for (const auto& value : fodder) {
@@ -371,7 +374,7 @@ static void BenchmarkBufferBuilder(
   // Write approx. 256 MB to BufferBuilder
   int64_t num_raw_values = (1 << 28) / raw_nbytes;
   for (auto _ : state) {
-    BufferBuilder builder;
+    BufferBuilder builder(memory_tracker.memory_pool());
     std::shared_ptr<Buffer> buf;
     for (int64_t i = 0; i < num_raw_values; ++i) {
       ABORT_NOT_OK(builder.Append(raw_data, raw_nbytes));
diff --git a/cpp/src/arrow/compute/exec/benchmark_util.cc 
b/cpp/src/arrow/compute/exec/benchmark_util.cc
index a3cd86d26d..17ce54e7b2 100644
--- a/cpp/src/arrow/compute/exec/benchmark_util.cc
+++ b/cpp/src/arrow/compute/exec/benchmark_util.cc
@@ -34,18 +34,18 @@ namespace compute {
 // an isolated node. We do this by passing in batches through a task 
scheduler, and
 // calling InputFinished and InputReceived.
 
-Status BenchmarkIsolatedNodeOverhead(benchmark::State& state,
-                                     arrow::compute::Expression expr, int32_t 
num_batches,
-                                     int32_t batch_size,
-                                     arrow::compute::BatchesWithSchema data,
-                                     std::string factory_name,
-                                     arrow::compute::ExecNodeOptions& options) 
{
+Status BenchmarkIsolatedNodeOverhead(
+    benchmark::State& state, arrow::compute::Expression expr, int32_t 
num_batches,
+    int32_t batch_size, arrow::compute::BatchesWithSchema data, std::string 
factory_name,
+    arrow::compute::ExecNodeOptions& options, arrow::MemoryPool* pool) {
   for (auto _ : state) {
     state.PauseTiming();
     AsyncGenerator<std::optional<arrow::compute::ExecBatch>> sink_gen;
 
+    ExecContext ctx(pool, ::arrow::internal::GetCpuThreadPool());
+
     ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::compute::ExecPlan> plan,
-                          arrow::compute::ExecPlan::Make());
+                          arrow::compute::ExecPlan::Make(ctx));
     // Source and sink nodes have no effect on the benchmark.
     // Used for dummy purposes as they are referenced in InputReceived and 
InputFinished.
     ARROW_ASSIGN_OR_RAISE(arrow::compute::ExecNode * source_node,
@@ -108,14 +108,15 @@ Status BenchmarkIsolatedNodeOverhead(benchmark::State& 
state,
 // Generates batches from data, then benchmark rows_per_second and 
batches_per_second for
 // a source -> node_declarations -> sink sequence.
 
-Status BenchmarkNodeOverhead(
-    benchmark::State& state, int32_t num_batches, int32_t batch_size,
-    arrow::compute::BatchesWithSchema data,
-    std::vector<arrow::compute::Declaration>& node_declarations) {
+Status BenchmarkNodeOverhead(benchmark::State& state, int32_t num_batches,
+                             int32_t batch_size, 
arrow::compute::BatchesWithSchema data,
+                             std::vector<arrow::compute::Declaration>& 
node_declarations,
+                             MemoryPool* pool) {
+  ExecContext ctx(pool, ::arrow::internal::GetCpuThreadPool());
   for (auto _ : state) {
     state.PauseTiming();
     ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::compute::ExecPlan> plan,
-                          arrow::compute::ExecPlan::Make());
+                          arrow::compute::ExecPlan::Make(ctx));
     AsyncGenerator<std::optional<arrow::compute::ExecBatch>> sink_gen;
     arrow::compute::Declaration source = arrow::compute::Declaration(
         {"source",
diff --git a/cpp/src/arrow/compute/exec/benchmark_util.h 
b/cpp/src/arrow/compute/exec/benchmark_util.h
index c66c2e91db..9965b5915d 100644
--- a/cpp/src/arrow/compute/exec/benchmark_util.h
+++ b/cpp/src/arrow/compute/exec/benchmark_util.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include <cstdint>
 #include <string>
 #include <vector>
@@ -31,14 +33,16 @@ namespace compute {
 
 Status BenchmarkNodeOverhead(benchmark::State& state, int32_t num_batches,
                              int32_t batch_size, 
arrow::compute::BatchesWithSchema data,
-                             std::vector<arrow::compute::Declaration>& 
node_declarations);
+                             std::vector<arrow::compute::Declaration>& 
node_declarations,
+                             arrow::MemoryPool* pool = default_memory_pool());
 
 Status BenchmarkIsolatedNodeOverhead(benchmark::State& state,
                                      arrow::compute::Expression expr, int32_t 
num_batches,
                                      int32_t batch_size,
                                      arrow::compute::BatchesWithSchema data,
                                      std::string factory_name,
-                                     arrow::compute::ExecNodeOptions& options);
+                                     arrow::compute::ExecNodeOptions& options,
+                                     arrow::MemoryPool* pool = 
default_memory_pool());
 
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/converter_benchmark.cc 
b/cpp/src/arrow/csv/converter_benchmark.cc
index b55c11e4db..8cd771a7da 100644
--- a/cpp/src/arrow/csv/converter_benchmark.cc
+++ b/cpp/src/arrow/csv/converter_benchmark.cc
@@ -29,11 +29,14 @@
 #include "arrow/csv/test_common.h"
 #include "arrow/io/memory.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/benchmark_util.h"
 #include "arrow/util/value_parsing.h"
 
 namespace arrow {
 namespace csv {
 
+::arrow::BenchmarkMemoryTracker memory_tracker;
+
 static std::shared_ptr<BlockParser> BuildFromExamples(
     const std::vector<std::string>& base_rows, int32_t num_rows) {
   std::vector<std::string> rows;
@@ -42,7 +45,8 @@ static std::shared_ptr<BlockParser> BuildFromExamples(
   }
 
   std::shared_ptr<BlockParser> result;
-  MakeCSVParser(rows, &result);
+  MakeCSVParser(rows, ParseOptions::Defaults(), -1, 
memory_tracker.memory_pool(),
+                &result);
   return result;
 }
 
@@ -84,7 +88,8 @@ static void BenchmarkConversion(benchmark::State& state,  // 
NOLINT non-const re
                                 BlockParser& parser,
                                 const std::shared_ptr<DataType>& type,
                                 ConvertOptions options) {
-  std::shared_ptr<Converter> converter = *Converter::Make(type, options);
+  std::shared_ptr<Converter> converter =
+      *Converter::Make(type, options, memory_tracker.memory_pool());
 
   while (state.KeepRunning()) {
     auto converted = *converter->Convert(parser, 0 /* col_index */);
diff --git a/cpp/src/arrow/csv/test_common.cc b/cpp/src/arrow/csv/test_common.cc
index 648ad18e3c..eeefa10f58 100644
--- a/cpp/src/arrow/csv/test_common.cc
+++ b/cpp/src/arrow/csv/test_common.cc
@@ -31,9 +31,9 @@ std::string MakeCSVData(std::vector<std::string> lines) {
 }
 
 void MakeCSVParser(std::vector<std::string> lines, ParseOptions options, 
int32_t num_cols,
-                   std::shared_ptr<BlockParser>* out) {
+                   MemoryPool* pool, std::shared_ptr<BlockParser>* out) {
   auto csv = MakeCSVData(lines);
-  auto parser = std::make_shared<BlockParser>(options, num_cols);
+  auto parser = std::make_shared<BlockParser>(pool, options, num_cols);
   uint32_t out_size;
   ASSERT_OK(parser->Parse(std::string_view(csv), &out_size));
   ASSERT_EQ(out_size, csv.size()) << "trailing CSV data not parsed";
@@ -42,7 +42,7 @@ void MakeCSVParser(std::vector<std::string> lines, 
ParseOptions options, int32_t
 
 void MakeCSVParser(std::vector<std::string> lines, ParseOptions options,
                    std::shared_ptr<BlockParser>* out) {
-  return MakeCSVParser(lines, options, -1, out);
+  return MakeCSVParser(lines, options, -1, default_memory_pool(), out);
 }
 
 void MakeCSVParser(std::vector<std::string> lines, 
std::shared_ptr<BlockParser>* out) {
@@ -57,7 +57,7 @@ void MakeColumnParser(std::vector<std::string> items, 
std::shared_ptr<BlockParse
   for (const auto& item : items) {
     lines.push_back(item + '\n');
   }
-  MakeCSVParser(lines, options, 1, out);
+  MakeCSVParser(lines, options, 1, default_memory_pool(), out);
   ASSERT_EQ((*out)->num_cols(), 1) << "Should have seen only 1 CSV column";
   ASSERT_EQ((*out)->num_rows(), items.size());
 }
diff --git a/cpp/src/arrow/csv/test_common.h b/cpp/src/arrow/csv/test_common.h
index 810a0b4721..07a4160447 100644
--- a/cpp/src/arrow/csv/test_common.h
+++ b/cpp/src/arrow/csv/test_common.h
@@ -34,7 +34,7 @@ std::string MakeCSVData(std::vector<std::string> lines);
 // Make a BlockParser from a vector of lines representing a CSV file
 ARROW_TESTING_EXPORT
 void MakeCSVParser(std::vector<std::string> lines, ParseOptions options, 
int32_t num_cols,
-                   std::shared_ptr<BlockParser>* out);
+                   MemoryPool* pool, std::shared_ptr<BlockParser>* out);
 
 ARROW_TESTING_EXPORT
 void MakeCSVParser(std::vector<std::string> lines, ParseOptions options,
diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc
index ba8ecd54b5..088e641de3 100644
--- a/cpp/src/arrow/io/file_test.cc
+++ b/cpp/src/arrow/io/file_test.cc
@@ -462,9 +462,11 @@ class MyMemoryPool : public MemoryPool {
 
   int64_t bytes_allocated() const override { return -1; }
 
+  int64_t total_bytes_allocated() const override { return -1; }
+
   std::string backend_name() const override { return "my"; }
 
-  int64_t num_allocations() const { return num_allocations_.load(); }
+  int64_t num_allocations() const override { return num_allocations_.load(); }
 
  private:
   std::atomic<int64_t> num_allocations_;
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index c87fdc6884..843329c17b 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -509,7 +509,7 @@ class BaseMemoryPoolImpl : public MemoryPool {
 #endif
     Allocator::DeallocateAligned(buffer, size, alignment);
 
-    stats_.UpdateAllocatedBytes(-size);
+    stats_.UpdateAllocatedBytes(-size, /*is_free*/ true);
   }
 
   void ReleaseUnused() override { Allocator::ReleaseUnused(); }
@@ -518,6 +518,12 @@ class BaseMemoryPoolImpl : public MemoryPool {
 
   int64_t max_memory() const override { return stats_.max_memory(); }
 
+  int64_t total_bytes_allocated() const override {
+    return stats_.total_bytes_allocated();
+  }
+
+  int64_t num_allocations() const override { return stats_.num_allocations(); }
+
  protected:
   internal::MemoryPoolStats stats_;
 };
@@ -732,6 +738,18 @@ int64_t LoggingMemoryPool::max_memory() const {
   return mem;
 }
 
+int64_t LoggingMemoryPool::total_bytes_allocated() const {
+  int64_t mem = pool_->total_bytes_allocated();
+  std::cout << "total_bytes_allocated: " << mem << std::endl;
+  return mem;
+}
+
+int64_t LoggingMemoryPool::num_allocations() const {
+  int64_t mem = pool_->num_allocations();
+  std::cout << "num_allocations: " << mem << std::endl;
+  return mem;
+}
+
 std::string LoggingMemoryPool::backend_name() const { return 
pool_->backend_name(); }
 
 ///////////////////////////////////////////////////////////////////////
@@ -756,13 +774,17 @@ class ProxyMemoryPool::ProxyMemoryPoolImpl {
 
   void Free(uint8_t* buffer, int64_t size, int64_t alignment) {
     pool_->Free(buffer, size, alignment);
-    stats_.UpdateAllocatedBytes(-size);
+    stats_.UpdateAllocatedBytes(-size, /*is_free=*/true);
   }
 
   int64_t bytes_allocated() const { return stats_.bytes_allocated(); }
 
   int64_t max_memory() const { return stats_.max_memory(); }
 
+  int64_t total_bytes_allocated() const { return 
stats_.total_bytes_allocated(); }
+
+  int64_t num_allocations() const { return stats_.num_allocations(); }
+
   std::string backend_name() const { return pool_->backend_name(); }
 
  private:
@@ -793,6 +815,12 @@ int64_t ProxyMemoryPool::bytes_allocated() const { return 
impl_->bytes_allocated
 
 int64_t ProxyMemoryPool::max_memory() const { return impl_->max_memory(); }
 
+int64_t ProxyMemoryPool::total_bytes_allocated() const {
+  return impl_->total_bytes_allocated();
+}
+
+int64_t ProxyMemoryPool::num_allocations() const { return 
impl_->num_allocations(); }
+
 std::string ProxyMemoryPool::backend_name() const { return 
impl_->backend_name(); }
 
 std::vector<std::string> SupportedMemoryBackendNames() {
diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h
index 4672dfb338..712a828041 100644
--- a/cpp/src/arrow/memory_pool.h
+++ b/cpp/src/arrow/memory_pool.h
@@ -43,18 +43,36 @@ class MemoryPoolStats {
 
   int64_t bytes_allocated() const { return bytes_allocated_.load(); }
 
-  inline void UpdateAllocatedBytes(int64_t diff) {
+  int64_t total_bytes_allocated() const { return 
total_allocated_bytes_.load(); }
+
+  int64_t num_allocations() const { return num_allocs_.load(); }
+
+  inline void UpdateAllocatedBytes(int64_t diff, bool is_free = false) {
     auto allocated = bytes_allocated_.fetch_add(diff) + diff;
     // "maximum" allocated memory is ill-defined in multi-threaded code,
     // so don't try to be too rigorous here
     if (diff > 0 && allocated > max_memory_) {
       max_memory_ = allocated;
     }
+
+    // Reallocations might just expand/contract the allocation in place or 
might
+    // copy to a new location. We can't really know, so we just represent the
+    // optimistic case.
+    if (diff > 0) {
+      total_allocated_bytes_ += diff;
+    }
+
+    // We count any reallocation as a allocation.
+    if (!is_free) {
+      num_allocs_ += 1;
+    }
   }
 
  protected:
-  std::atomic<int64_t> bytes_allocated_;
-  std::atomic<int64_t> max_memory_;
+  std::atomic<int64_t> bytes_allocated_ = 0;
+  std::atomic<int64_t> max_memory_ = 0;
+  std::atomic<int64_t> total_allocated_bytes_ = 0;
+  std::atomic<int64_t> num_allocs_ = 0;
 };
 
 }  // namespace internal
@@ -119,6 +137,12 @@ class ARROW_EXPORT MemoryPool {
   /// returns -1
   virtual int64_t max_memory() const;
 
+  /// The number of bytes that were allocated.
+  virtual int64_t total_bytes_allocated() const = 0;
+
+  /// The number of allocations or reallocations that were requested.
+  virtual int64_t num_allocations() const = 0;
+
   /// The name of the backend used by this MemoryPool (e.g. "system" or 
"jemalloc").
   virtual std::string backend_name() const = 0;
 
@@ -144,6 +168,10 @@ class ARROW_EXPORT LoggingMemoryPool : public MemoryPool {
 
   int64_t max_memory() const override;
 
+  int64_t total_bytes_allocated() const override;
+
+  int64_t num_allocations() const override;
+
   std::string backend_name() const override;
 
  private:
@@ -172,6 +200,10 @@ class ARROW_EXPORT ProxyMemoryPool : public MemoryPool {
 
   int64_t max_memory() const override;
 
+  int64_t total_bytes_allocated() const override;
+
+  int64_t num_allocations() const override;
+
   std::string backend_name() const override;
 
  private:
diff --git a/cpp/src/arrow/memory_pool_test.cc 
b/cpp/src/arrow/memory_pool_test.cc
index a227226545..81d9d69ba3 100644
--- a/cpp/src/arrow/memory_pool_test.cc
+++ b/cpp/src/arrow/memory_pool_test.cc
@@ -110,7 +110,8 @@ TEST(DefaultMemoryPool, Identity) {
 // googletest documentation
 #if !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER))
 
-TEST(DefaultMemoryPoolDeathTest, MaxMemory) {
+// TODO: is this still a death test?
+TEST(DefaultMemoryPoolDeathTest, Statistics) {
   MemoryPool* pool = default_memory_pool();
   uint8_t* data1;
   uint8_t* data2;
@@ -118,11 +119,32 @@ TEST(DefaultMemoryPoolDeathTest, MaxMemory) {
   ASSERT_OK(pool->Allocate(100, &data1));
   ASSERT_OK(pool->Allocate(50, &data2));
   pool->Free(data2, 50);
-  ASSERT_OK(pool->Allocate(100, &data2));
-  pool->Free(data1, 100);
-  pool->Free(data2, 100);
 
-  ASSERT_EQ(200, pool->max_memory());
+  ASSERT_EQ(150, pool->max_memory());
+  ASSERT_EQ(150, pool->total_bytes_allocated());
+  ASSERT_EQ(100, pool->bytes_allocated());
+  ASSERT_EQ(2, pool->num_allocations());
+
+  ASSERT_OK(pool->Reallocate(100, 150, &data1));  // Grow data1
+
+  ASSERT_EQ(150, pool->max_memory());
+  ASSERT_EQ(150 + 50, pool->total_bytes_allocated());
+  ASSERT_EQ(150, pool->bytes_allocated());
+  ASSERT_EQ(3, pool->num_allocations());
+
+  ASSERT_OK(pool->Reallocate(150, 50, &data1));  // Shrink data1
+
+  ASSERT_EQ(150, pool->max_memory());
+  ASSERT_EQ(200, pool->total_bytes_allocated());
+  ASSERT_EQ(50, pool->bytes_allocated());
+  ASSERT_EQ(4, pool->num_allocations());
+
+  pool->Free(data1, 50);
+
+  ASSERT_EQ(150, pool->max_memory());
+  ASSERT_EQ(200, pool->total_bytes_allocated());
+  ASSERT_EQ(0, pool->bytes_allocated());
+  ASSERT_EQ(4, pool->num_allocations());
 }
 
 #endif  // ARROW_VALGRIND
diff --git a/cpp/src/arrow/stl_allocator.h b/cpp/src/arrow/stl_allocator.h
index 9d921d4344..a1f4ae9feb 100644
--- a/cpp/src/arrow/stl_allocator.h
+++ b/cpp/src/arrow/stl_allocator.h
@@ -130,13 +130,19 @@ class STLMemoryPool : public MemoryPool {
 
   void Free(uint8_t* buffer, int64_t size, int64_t /*alignment*/) override {
     alloc_.deallocate(buffer, size);
-    stats_.UpdateAllocatedBytes(-size);
+    stats_.UpdateAllocatedBytes(-size, /*is_free=*/true);
   }
 
   int64_t bytes_allocated() const override { return stats_.bytes_allocated(); }
 
   int64_t max_memory() const override { return stats_.max_memory(); }
 
+  int64_t total_bytes_allocated() const override {
+    return stats_.total_bytes_allocated();
+  }
+
+  int64_t num_allocations() const override { return stats_.num_allocations(); }
+
   std::string backend_name() const override { return "stl"; }
 
  private:
diff --git a/cpp/src/arrow/util/benchmark_util.h 
b/cpp/src/arrow/util/benchmark_util.h
index 5a5f51df5a..19abb7e1b3 100644
--- a/cpp/src/arrow/util/benchmark_util.h
+++ b/cpp/src/arrow/util/benchmark_util.h
@@ -21,7 +21,10 @@
 
 #include "benchmark/benchmark.h"
 
+#include "arrow/memory_pool.h"
+#include "arrow/type_fwd.h"
 #include "arrow/util/cpu_info.h"
+#include "arrow/util/logging.h"  // IWYU pragma: keep
 
 namespace arrow {
 
@@ -136,4 +139,66 @@ struct RegressionArgs {
   bool size_is_bytes_;
 };
 
+class MemoryPoolMemoryManager : public benchmark::MemoryManager {
+  void Start() override {
+    memory_pool = std::make_shared<ProxyMemoryPool>(default_memory_pool());
+
+    MemoryPool* default_pool = default_memory_pool();
+    global_allocations_start = default_pool->num_allocations();
+  }
+
+  void Stop(benchmark::MemoryManager::Result* result) override {
+    // If num_allocations is still zero, we assume that the memory pool wasn't 
passed down
+    // so we should record them.
+    MemoryPool* default_pool = default_memory_pool();
+    int64_t new_default_allocations =
+        default_pool->num_allocations() - global_allocations_start;
+
+    // Only record metrics metrics if (1) there were allocations and (2) we
+    // recorded at least one.
+    if (new_default_allocations > 0 && memory_pool->num_allocations() > 0) {
+      if (new_default_allocations > memory_pool->num_allocations()) {
+        // If we missed some, let's report that.
+        int64_t missed_allocations =
+            new_default_allocations - memory_pool->num_allocations();
+        ARROW_LOG(WARNING) << "BenchmarkMemoryTracker recorded some 
allocations "
+                           << "for a benchmark, but missed " << 
missed_allocations
+                           << " allocations.\n";
+      }
+
+      result->max_bytes_used = memory_pool->max_memory();
+      result->total_allocated_bytes = memory_pool->total_bytes_allocated();
+      result->num_allocs = memory_pool->num_allocations();
+    }
+  }
+
+ public:
+  std::shared_ptr<::arrow::ProxyMemoryPool> memory_pool;
+
+ protected:
+  int64_t global_allocations_start;
+};
+
+/// \brief Track memory pool allocations in benchmarks.
+///
+/// Instantiate as a global variable to register the hooks into Google 
Benchmark
+/// to collect memory metrics. Before each benchmark, a new ProxyMemoryPool is
+/// created. It can then be accessed with memory_pool(). Once the benchmark is
+/// complete, the hook will record the maximum memory used, the total bytes
+/// allocated, and the total number of allocations. If no allocations were 
seen,
+/// (for example, if you forgot to pass down the memory pool), then these 
metrics
+/// will not be saved.
+///
+/// Since this is used as one global variable, this will not work if multiple
+/// benchmarks are run concurrently or for multi-threaded benchmarks (ones
+/// that use `->ThreadRange(...)`).
+class BenchmarkMemoryTracker {
+ public:
+  BenchmarkMemoryTracker() : manager_() { 
::benchmark::RegisterMemoryManager(&manager_); }
+  ::arrow::MemoryPool* memory_pool() const { return 
manager_.memory_pool.get(); }
+
+ protected:
+  ::arrow::MemoryPoolMemoryManager manager_;
+};
+
 }  // namespace arrow
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 56b2000c84..79e181c957 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -61,8 +61,8 @@ ARROW_CARES_BUILD_VERSION=1.17.2
 
ARROW_CARES_BUILD_SHA256_CHECKSUM=4803c844ce20ce510ef0eb83f8ea41fa24ecaae9d280c468c582d2bb25b3913d
 ARROW_CRC32C_BUILD_VERSION=1.1.2
 
ARROW_CRC32C_BUILD_SHA256_CHECKSUM=ac07840513072b7fcebda6e821068aa04889018f24e10e46181068fb214d7e56
-ARROW_GBENCHMARK_BUILD_VERSION=v1.6.0
-ARROW_GBENCHMARK_BUILD_SHA256_CHECKSUM=1f71c72ce08d2c1310011ea6436b31e39ccab8c2db94186d26657d41747c85d6
+ARROW_GBENCHMARK_BUILD_VERSION=v1.7.1
+ARROW_GBENCHMARK_BUILD_SHA256_CHECKSUM=6430e4092653380d9dc4ccb45a1e2dc9259d581f4866dc0759713126056bc1d7
 ARROW_GFLAGS_BUILD_VERSION=v2.2.2
 
ARROW_GFLAGS_BUILD_SHA256_CHECKSUM=34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf
 ARROW_GLOG_BUILD_VERSION=v0.5.0
diff --git a/java/dataset/src/main/cpp/jni_util.cc 
b/java/dataset/src/main/cpp/jni_util.cc
index 0aba2e5121..f2f6871973 100644
--- a/java/dataset/src/main/cpp/jni_util.cc
+++ b/java/dataset/src/main/cpp/jni_util.cc
@@ -115,6 +115,10 @@ class ReservationListenableMemoryPool::Impl {
 
   int64_t max_memory() { return stats_.max_memory(); }
 
+  int64_t total_bytes_allocated() { return stats_.total_bytes_allocated(); }
+
+  int64_t num_allocations() { return stats_.num_allocations(); }
+
   std::string backend_name() { return pool_->backend_name(); }
 
   std::shared_ptr<ReservationListener> get_listener() { return listener_; }
@@ -158,6 +162,14 @@ int64_t ReservationListenableMemoryPool::max_memory() 
const {
   return impl_->max_memory();
 }
 
+int64_t ReservationListenableMemoryPool::total_bytes_allocated() const {
+  return impl_->total_bytes_allocated();
+}
+
+int64_t ReservationListenableMemoryPool::num_allocations() const {
+  return impl_->num_allocations();
+}
+
 std::string ReservationListenableMemoryPool::backend_name() const {
   return impl_->backend_name();
 }
diff --git a/java/dataset/src/main/cpp/jni_util.h 
b/java/dataset/src/main/cpp/jni_util.h
index 5697a82c8d..20482a6c54 100644
--- a/java/dataset/src/main/cpp/jni_util.h
+++ b/java/dataset/src/main/cpp/jni_util.h
@@ -158,6 +158,10 @@ class ReservationListenableMemoryPool : public 
arrow::MemoryPool {
 
   int64_t max_memory() const override;
 
+  int64_t total_bytes_allocated() const override;
+
+  int64_t num_allocations() const override;
+
   std::string backend_name() const override;
 
   std::shared_ptr<ReservationListener> get_listener();
diff --git a/r/src/memorypool.cpp b/r/src/memorypool.cpp
index 7c3deec98f..027aa8ef2a 100644
--- a/r/src/memorypool.cpp
+++ b/r/src/memorypool.cpp
@@ -45,6 +45,12 @@ class GcMemoryPool : public arrow::MemoryPool {
 
   int64_t max_memory() const override { return pool_->max_memory(); }
 
+  int64_t total_bytes_allocated() const override {
+    return pool_->total_bytes_allocated();
+  }
+
+  int64_t num_allocations() const override { return pool_->num_allocations(); }
+
   std::string backend_name() const override { return pool_->backend_name(); }
 
  private:

[arrow] branch master updated: GH-15231: [C++][Benchmarking] Add new memory pool metrics and track in benchmarks (#33731)

Reply via email to