This is an automated email from the ASF dual-hosted git repository.
wjones127 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ddfa8eed9b GH-15231: [C++][Benchmarking] Add new memory pool metrics
and track in benchmarks (#33731)
ddfa8eed9b is described below
commit ddfa8eed9b188fcc7b38767d1858c2588c588f05
Author: Will Jones <[email protected]>
AuthorDate: Mon Feb 13 14:28:40 2023 -0800
GH-15231: [C++][Benchmarking] Add new memory pool metrics and track in
benchmarks (#33731)
### Rationale for this change
We would like to track memory usage in many of our benchmarks. In order to
isolate memory metrics per benchmark, we pass in a new `ProxyMemoryPool` to
each benchmark, and collect the metrics from that at the conclusion of each
benchmark.
This only works for benchmarks where there are no allocations that bypass
the passed pool and instead use the `default_memory_pool()`. Unfortunately most
do right now, so this is only enabled for two benchmarks: builders and CSV
converter.
### What changes are included in this PR?
* `MemoryPool` now tracks both `num_allocations` and
`total_bytes_allocated`.
* There is now a class `BenchmarkMemoryTracker` which can be instantiated
in benchmark files to get access to the `ProxyMemoryPool` that takes
measurements and saves them to Google benchmark.
* Wired up memory tracking to `arrow-csv-converter-benchmark` and
`arrow-builder-benchmark`
### Are these changes tested?
* [x] Add tests
### Are there any user-facing changes?
* [x] Validate performance impact of new metrics in memory pools.
No breaking changes.
* Closes: #15231
Authored-by: Will Jones <[email protected]>
Signed-off-by: Will Jones <[email protected]>
---
cpp/src/arrow/builder_benchmark.cc | 23 +++++-----
cpp/src/arrow/compute/exec/benchmark_util.cc | 25 ++++++-----
cpp/src/arrow/compute/exec/benchmark_util.h | 8 +++-
cpp/src/arrow/csv/converter_benchmark.cc | 9 +++-
cpp/src/arrow/csv/test_common.cc | 8 ++--
cpp/src/arrow/csv/test_common.h | 2 +-
cpp/src/arrow/io/file_test.cc | 4 +-
cpp/src/arrow/memory_pool.cc | 32 +++++++++++++-
cpp/src/arrow/memory_pool.h | 38 ++++++++++++++--
cpp/src/arrow/memory_pool_test.cc | 32 +++++++++++---
cpp/src/arrow/stl_allocator.h | 8 +++-
cpp/src/arrow/util/benchmark_util.h | 65 ++++++++++++++++++++++++++++
cpp/thirdparty/versions.txt | 4 +-
java/dataset/src/main/cpp/jni_util.cc | 12 +++++
java/dataset/src/main/cpp/jni_util.h | 4 ++
r/src/memorypool.cpp | 6 +++
16 files changed, 235 insertions(+), 45 deletions(-)
diff --git a/cpp/src/arrow/builder_benchmark.cc
b/cpp/src/arrow/builder_benchmark.cc
index cf3e7f32d5..e639a900cc 100644
--- a/cpp/src/arrow/builder_benchmark.cc
+++ b/cpp/src/arrow/builder_benchmark.cc
@@ -29,11 +29,14 @@
#include "arrow/builder.h"
#include "arrow/memory_pool.h"
#include "arrow/testing/gtest_util.h"
+#include "arrow/util/benchmark_util.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/decimal.h"
namespace arrow {
+::arrow::BenchmarkMemoryTracker memory_tracker;
+
using ValueType = int64_t;
using VectorType = std::vector<ValueType>;
@@ -59,7 +62,7 @@ static std::string_view kBinaryView(kBinaryString);
static void BuildIntArrayNoNulls(benchmark::State& state) { // NOLINT
non-const reference
for (auto _ : state) {
- Int64Builder builder;
+ Int64Builder builder(memory_tracker.memory_pool());
for (int i = 0; i < kRounds; i++) {
ABORT_NOT_OK(builder.AppendValues(kData.data(), kData.size(), nullptr));
@@ -75,7 +78,7 @@ static void BuildIntArrayNoNulls(benchmark::State& state) {
// NOLINT non-const
static void BuildAdaptiveIntNoNulls(
benchmark::State& state) { // NOLINT non-const reference
for (auto _ : state) {
- AdaptiveIntBuilder builder;
+ AdaptiveIntBuilder builder(memory_tracker.memory_pool());
for (int i = 0; i < kRounds; i++) {
ABORT_NOT_OK(builder.AppendValues(kData.data(), kData.size(), nullptr));
@@ -91,7 +94,7 @@ static void BuildAdaptiveIntNoNulls(
static void BuildAdaptiveIntNoNullsScalarAppend(
benchmark::State& state) { // NOLINT non-const reference
for (auto _ : state) {
- AdaptiveIntBuilder builder;
+ AdaptiveIntBuilder builder(memory_tracker.memory_pool());
for (int i = 0; i < kRounds; i++) {
for (size_t j = 0; j < kData.size(); j++) {
@@ -113,7 +116,7 @@ static void BuildBooleanArrayNoNulls(
const uint8_t* data = reinterpret_cast<const uint8_t*>(kData.data());
for (auto _ : state) {
- BooleanBuilder builder;
+ BooleanBuilder builder(memory_tracker.memory_pool());
for (int i = 0; i < kRounds; i++) {
ABORT_NOT_OK(builder.AppendValues(data, n_bytes));
@@ -128,7 +131,7 @@ static void BuildBooleanArrayNoNulls(
static void BuildBinaryArray(benchmark::State& state) { // NOLINT non-const
reference
for (auto _ : state) {
- BinaryBuilder builder;
+ BinaryBuilder builder(memory_tracker.memory_pool());
for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) {
ABORT_NOT_OK(builder.Append(kBinaryView));
@@ -147,7 +150,7 @@ static void BuildChunkedBinaryArray(
const int32_t kChunkSize = 1 << 20;
for (auto _ : state) {
- internal::ChunkedBinaryBuilder builder(kChunkSize);
+ internal::ChunkedBinaryBuilder builder(kChunkSize,
memory_tracker.memory_pool());
for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) {
ABORT_NOT_OK(builder.Append(kBinaryView));
@@ -165,7 +168,7 @@ static void BuildFixedSizeBinaryArray(
auto type = fixed_size_binary(static_cast<int32_t>(kBinaryView.size()));
for (auto _ : state) {
- FixedSizeBinaryBuilder builder(type);
+ FixedSizeBinaryBuilder builder(type, memory_tracker.memory_pool());
for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) {
ABORT_NOT_OK(builder.Append(kBinaryView));
@@ -185,7 +188,7 @@ static void BuildDecimalArray(benchmark::State& state) {
// NOLINT non-const re
int32_t scale = 0;
ABORT_NOT_OK(Decimal128::FromString("1234.1234", &value, &precision,
&scale));
for (auto _ : state) {
- Decimal128Builder builder(type);
+ Decimal128Builder builder(type, memory_tracker.memory_pool());
for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) {
ABORT_NOT_OK(builder.Append(value));
@@ -298,7 +301,7 @@ static void BenchmarkDictionaryArray(
benchmark::State& state, // NOLINT non-const reference
const std::vector<Scalar>& fodder, size_t fodder_nbytes = 0) {
for (auto _ : state) {
- DictionaryBuilderType builder(default_memory_pool());
+ DictionaryBuilderType builder(memory_tracker.memory_pool());
for (int64_t i = 0; i < kRounds; i++) {
for (const auto& value : fodder) {
@@ -371,7 +374,7 @@ static void BenchmarkBufferBuilder(
// Write approx. 256 MB to BufferBuilder
int64_t num_raw_values = (1 << 28) / raw_nbytes;
for (auto _ : state) {
- BufferBuilder builder;
+ BufferBuilder builder(memory_tracker.memory_pool());
std::shared_ptr<Buffer> buf;
for (int64_t i = 0; i < num_raw_values; ++i) {
ABORT_NOT_OK(builder.Append(raw_data, raw_nbytes));
diff --git a/cpp/src/arrow/compute/exec/benchmark_util.cc
b/cpp/src/arrow/compute/exec/benchmark_util.cc
index a3cd86d26d..17ce54e7b2 100644
--- a/cpp/src/arrow/compute/exec/benchmark_util.cc
+++ b/cpp/src/arrow/compute/exec/benchmark_util.cc
@@ -34,18 +34,18 @@ namespace compute {
// an isolated node. We do this by passing in batches through a task
scheduler, and
// calling InputFinished and InputReceived.
-Status BenchmarkIsolatedNodeOverhead(benchmark::State& state,
- arrow::compute::Expression expr, int32_t
num_batches,
- int32_t batch_size,
- arrow::compute::BatchesWithSchema data,
- std::string factory_name,
- arrow::compute::ExecNodeOptions& options)
{
+Status BenchmarkIsolatedNodeOverhead(
+ benchmark::State& state, arrow::compute::Expression expr, int32_t
num_batches,
+ int32_t batch_size, arrow::compute::BatchesWithSchema data, std::string
factory_name,
+ arrow::compute::ExecNodeOptions& options, arrow::MemoryPool* pool) {
for (auto _ : state) {
state.PauseTiming();
AsyncGenerator<std::optional<arrow::compute::ExecBatch>> sink_gen;
+ ExecContext ctx(pool, ::arrow::internal::GetCpuThreadPool());
+
ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::compute::ExecPlan> plan,
- arrow::compute::ExecPlan::Make());
+ arrow::compute::ExecPlan::Make(ctx));
// Source and sink nodes have no effect on the benchmark.
// Used for dummy purposes as they are referenced in InputReceived and
InputFinished.
ARROW_ASSIGN_OR_RAISE(arrow::compute::ExecNode * source_node,
@@ -108,14 +108,15 @@ Status BenchmarkIsolatedNodeOverhead(benchmark::State&
state,
// Generates batches from data, then benchmark rows_per_second and
batches_per_second for
// a source -> node_declarations -> sink sequence.
-Status BenchmarkNodeOverhead(
- benchmark::State& state, int32_t num_batches, int32_t batch_size,
- arrow::compute::BatchesWithSchema data,
- std::vector<arrow::compute::Declaration>& node_declarations) {
+Status BenchmarkNodeOverhead(benchmark::State& state, int32_t num_batches,
+ int32_t batch_size,
arrow::compute::BatchesWithSchema data,
+ std::vector<arrow::compute::Declaration>&
node_declarations,
+ MemoryPool* pool) {
+ ExecContext ctx(pool, ::arrow::internal::GetCpuThreadPool());
for (auto _ : state) {
state.PauseTiming();
ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::compute::ExecPlan> plan,
- arrow::compute::ExecPlan::Make());
+ arrow::compute::ExecPlan::Make(ctx));
AsyncGenerator<std::optional<arrow::compute::ExecBatch>> sink_gen;
arrow::compute::Declaration source = arrow::compute::Declaration(
{"source",
diff --git a/cpp/src/arrow/compute/exec/benchmark_util.h
b/cpp/src/arrow/compute/exec/benchmark_util.h
index c66c2e91db..9965b5915d 100644
--- a/cpp/src/arrow/compute/exec/benchmark_util.h
+++ b/cpp/src/arrow/compute/exec/benchmark_util.h
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+#pragma once
+
#include <cstdint>
#include <string>
#include <vector>
@@ -31,14 +33,16 @@ namespace compute {
Status BenchmarkNodeOverhead(benchmark::State& state, int32_t num_batches,
int32_t batch_size,
arrow::compute::BatchesWithSchema data,
- std::vector<arrow::compute::Declaration>&
node_declarations);
+ std::vector<arrow::compute::Declaration>&
node_declarations,
+ arrow::MemoryPool* pool = default_memory_pool());
Status BenchmarkIsolatedNodeOverhead(benchmark::State& state,
arrow::compute::Expression expr, int32_t
num_batches,
int32_t batch_size,
arrow::compute::BatchesWithSchema data,
std::string factory_name,
- arrow::compute::ExecNodeOptions& options);
+ arrow::compute::ExecNodeOptions& options,
+ arrow::MemoryPool* pool =
default_memory_pool());
} // namespace compute
} // namespace arrow
diff --git a/cpp/src/arrow/csv/converter_benchmark.cc
b/cpp/src/arrow/csv/converter_benchmark.cc
index b55c11e4db..8cd771a7da 100644
--- a/cpp/src/arrow/csv/converter_benchmark.cc
+++ b/cpp/src/arrow/csv/converter_benchmark.cc
@@ -29,11 +29,14 @@
#include "arrow/csv/test_common.h"
#include "arrow/io/memory.h"
#include "arrow/testing/gtest_util.h"
+#include "arrow/util/benchmark_util.h"
#include "arrow/util/value_parsing.h"
namespace arrow {
namespace csv {
+::arrow::BenchmarkMemoryTracker memory_tracker;
+
static std::shared_ptr<BlockParser> BuildFromExamples(
const std::vector<std::string>& base_rows, int32_t num_rows) {
std::vector<std::string> rows;
@@ -42,7 +45,8 @@ static std::shared_ptr<BlockParser> BuildFromExamples(
}
std::shared_ptr<BlockParser> result;
- MakeCSVParser(rows, &result);
+ MakeCSVParser(rows, ParseOptions::Defaults(), -1,
memory_tracker.memory_pool(),
+ &result);
return result;
}
@@ -84,7 +88,8 @@ static void BenchmarkConversion(benchmark::State& state, //
NOLINT non-const re
BlockParser& parser,
const std::shared_ptr<DataType>& type,
ConvertOptions options) {
- std::shared_ptr<Converter> converter = *Converter::Make(type, options);
+ std::shared_ptr<Converter> converter =
+ *Converter::Make(type, options, memory_tracker.memory_pool());
while (state.KeepRunning()) {
auto converted = *converter->Convert(parser, 0 /* col_index */);
diff --git a/cpp/src/arrow/csv/test_common.cc b/cpp/src/arrow/csv/test_common.cc
index 648ad18e3c..eeefa10f58 100644
--- a/cpp/src/arrow/csv/test_common.cc
+++ b/cpp/src/arrow/csv/test_common.cc
@@ -31,9 +31,9 @@ std::string MakeCSVData(std::vector<std::string> lines) {
}
void MakeCSVParser(std::vector<std::string> lines, ParseOptions options,
int32_t num_cols,
- std::shared_ptr<BlockParser>* out) {
+ MemoryPool* pool, std::shared_ptr<BlockParser>* out) {
auto csv = MakeCSVData(lines);
- auto parser = std::make_shared<BlockParser>(options, num_cols);
+ auto parser = std::make_shared<BlockParser>(pool, options, num_cols);
uint32_t out_size;
ASSERT_OK(parser->Parse(std::string_view(csv), &out_size));
ASSERT_EQ(out_size, csv.size()) << "trailing CSV data not parsed";
@@ -42,7 +42,7 @@ void MakeCSVParser(std::vector<std::string> lines,
ParseOptions options, int32_t
void MakeCSVParser(std::vector<std::string> lines, ParseOptions options,
std::shared_ptr<BlockParser>* out) {
- return MakeCSVParser(lines, options, -1, out);
+ return MakeCSVParser(lines, options, -1, default_memory_pool(), out);
}
void MakeCSVParser(std::vector<std::string> lines,
std::shared_ptr<BlockParser>* out) {
@@ -57,7 +57,7 @@ void MakeColumnParser(std::vector<std::string> items,
std::shared_ptr<BlockParse
for (const auto& item : items) {
lines.push_back(item + '\n');
}
- MakeCSVParser(lines, options, 1, out);
+ MakeCSVParser(lines, options, 1, default_memory_pool(), out);
ASSERT_EQ((*out)->num_cols(), 1) << "Should have seen only 1 CSV column";
ASSERT_EQ((*out)->num_rows(), items.size());
}
diff --git a/cpp/src/arrow/csv/test_common.h b/cpp/src/arrow/csv/test_common.h
index 810a0b4721..07a4160447 100644
--- a/cpp/src/arrow/csv/test_common.h
+++ b/cpp/src/arrow/csv/test_common.h
@@ -34,7 +34,7 @@ std::string MakeCSVData(std::vector<std::string> lines);
// Make a BlockParser from a vector of lines representing a CSV file
ARROW_TESTING_EXPORT
void MakeCSVParser(std::vector<std::string> lines, ParseOptions options,
int32_t num_cols,
- std::shared_ptr<BlockParser>* out);
+ MemoryPool* pool, std::shared_ptr<BlockParser>* out);
ARROW_TESTING_EXPORT
void MakeCSVParser(std::vector<std::string> lines, ParseOptions options,
diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc
index ba8ecd54b5..088e641de3 100644
--- a/cpp/src/arrow/io/file_test.cc
+++ b/cpp/src/arrow/io/file_test.cc
@@ -462,9 +462,11 @@ class MyMemoryPool : public MemoryPool {
int64_t bytes_allocated() const override { return -1; }
+ int64_t total_bytes_allocated() const override { return -1; }
+
std::string backend_name() const override { return "my"; }
- int64_t num_allocations() const { return num_allocations_.load(); }
+ int64_t num_allocations() const override { return num_allocations_.load(); }
private:
std::atomic<int64_t> num_allocations_;
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index c87fdc6884..843329c17b 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -509,7 +509,7 @@ class BaseMemoryPoolImpl : public MemoryPool {
#endif
Allocator::DeallocateAligned(buffer, size, alignment);
- stats_.UpdateAllocatedBytes(-size);
+ stats_.UpdateAllocatedBytes(-size, /*is_free*/ true);
}
void ReleaseUnused() override { Allocator::ReleaseUnused(); }
@@ -518,6 +518,12 @@ class BaseMemoryPoolImpl : public MemoryPool {
int64_t max_memory() const override { return stats_.max_memory(); }
+ int64_t total_bytes_allocated() const override {
+ return stats_.total_bytes_allocated();
+ }
+
+ int64_t num_allocations() const override { return stats_.num_allocations(); }
+
protected:
internal::MemoryPoolStats stats_;
};
@@ -732,6 +738,18 @@ int64_t LoggingMemoryPool::max_memory() const {
return mem;
}
+int64_t LoggingMemoryPool::total_bytes_allocated() const {
+ int64_t mem = pool_->total_bytes_allocated();
+ std::cout << "total_bytes_allocated: " << mem << std::endl;
+ return mem;
+}
+
+int64_t LoggingMemoryPool::num_allocations() const {
+ int64_t mem = pool_->num_allocations();
+ std::cout << "num_allocations: " << mem << std::endl;
+ return mem;
+}
+
std::string LoggingMemoryPool::backend_name() const { return
pool_->backend_name(); }
///////////////////////////////////////////////////////////////////////
@@ -756,13 +774,17 @@ class ProxyMemoryPool::ProxyMemoryPoolImpl {
void Free(uint8_t* buffer, int64_t size, int64_t alignment) {
pool_->Free(buffer, size, alignment);
- stats_.UpdateAllocatedBytes(-size);
+ stats_.UpdateAllocatedBytes(-size, /*is_free=*/true);
}
int64_t bytes_allocated() const { return stats_.bytes_allocated(); }
int64_t max_memory() const { return stats_.max_memory(); }
+ int64_t total_bytes_allocated() const { return
stats_.total_bytes_allocated(); }
+
+ int64_t num_allocations() const { return stats_.num_allocations(); }
+
std::string backend_name() const { return pool_->backend_name(); }
private:
@@ -793,6 +815,12 @@ int64_t ProxyMemoryPool::bytes_allocated() const { return
impl_->bytes_allocated
int64_t ProxyMemoryPool::max_memory() const { return impl_->max_memory(); }
+int64_t ProxyMemoryPool::total_bytes_allocated() const {
+ return impl_->total_bytes_allocated();
+}
+
+int64_t ProxyMemoryPool::num_allocations() const { return
impl_->num_allocations(); }
+
std::string ProxyMemoryPool::backend_name() const { return
impl_->backend_name(); }
std::vector<std::string> SupportedMemoryBackendNames() {
diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h
index 4672dfb338..712a828041 100644
--- a/cpp/src/arrow/memory_pool.h
+++ b/cpp/src/arrow/memory_pool.h
@@ -43,18 +43,36 @@ class MemoryPoolStats {
int64_t bytes_allocated() const { return bytes_allocated_.load(); }
- inline void UpdateAllocatedBytes(int64_t diff) {
+ int64_t total_bytes_allocated() const { return
total_allocated_bytes_.load(); }
+
+ int64_t num_allocations() const { return num_allocs_.load(); }
+
+ inline void UpdateAllocatedBytes(int64_t diff, bool is_free = false) {
auto allocated = bytes_allocated_.fetch_add(diff) + diff;
// "maximum" allocated memory is ill-defined in multi-threaded code,
// so don't try to be too rigorous here
if (diff > 0 && allocated > max_memory_) {
max_memory_ = allocated;
}
+
+ // Reallocations might just expand/contract the allocation in place or
might
+ // copy to a new location. We can't really know, so we just represent the
+ // optimistic case.
+ if (diff > 0) {
+ total_allocated_bytes_ += diff;
+ }
+
+ // We count any reallocation as a allocation.
+ if (!is_free) {
+ num_allocs_ += 1;
+ }
}
protected:
- std::atomic<int64_t> bytes_allocated_;
- std::atomic<int64_t> max_memory_;
+ std::atomic<int64_t> bytes_allocated_ = 0;
+ std::atomic<int64_t> max_memory_ = 0;
+ std::atomic<int64_t> total_allocated_bytes_ = 0;
+ std::atomic<int64_t> num_allocs_ = 0;
};
} // namespace internal
@@ -119,6 +137,12 @@ class ARROW_EXPORT MemoryPool {
/// returns -1
virtual int64_t max_memory() const;
+ /// The number of bytes that were allocated.
+ virtual int64_t total_bytes_allocated() const = 0;
+
+ /// The number of allocations or reallocations that were requested.
+ virtual int64_t num_allocations() const = 0;
+
/// The name of the backend used by this MemoryPool (e.g. "system" or
"jemalloc").
virtual std::string backend_name() const = 0;
@@ -144,6 +168,10 @@ class ARROW_EXPORT LoggingMemoryPool : public MemoryPool {
int64_t max_memory() const override;
+ int64_t total_bytes_allocated() const override;
+
+ int64_t num_allocations() const override;
+
std::string backend_name() const override;
private:
@@ -172,6 +200,10 @@ class ARROW_EXPORT ProxyMemoryPool : public MemoryPool {
int64_t max_memory() const override;
+ int64_t total_bytes_allocated() const override;
+
+ int64_t num_allocations() const override;
+
std::string backend_name() const override;
private:
diff --git a/cpp/src/arrow/memory_pool_test.cc
b/cpp/src/arrow/memory_pool_test.cc
index a227226545..81d9d69ba3 100644
--- a/cpp/src/arrow/memory_pool_test.cc
+++ b/cpp/src/arrow/memory_pool_test.cc
@@ -110,7 +110,8 @@ TEST(DefaultMemoryPool, Identity) {
// googletest documentation
#if !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER))
-TEST(DefaultMemoryPoolDeathTest, MaxMemory) {
+// TODO: is this still a death test?
+TEST(DefaultMemoryPoolDeathTest, Statistics) {
MemoryPool* pool = default_memory_pool();
uint8_t* data1;
uint8_t* data2;
@@ -118,11 +119,32 @@ TEST(DefaultMemoryPoolDeathTest, MaxMemory) {
ASSERT_OK(pool->Allocate(100, &data1));
ASSERT_OK(pool->Allocate(50, &data2));
pool->Free(data2, 50);
- ASSERT_OK(pool->Allocate(100, &data2));
- pool->Free(data1, 100);
- pool->Free(data2, 100);
- ASSERT_EQ(200, pool->max_memory());
+ ASSERT_EQ(150, pool->max_memory());
+ ASSERT_EQ(150, pool->total_bytes_allocated());
+ ASSERT_EQ(100, pool->bytes_allocated());
+ ASSERT_EQ(2, pool->num_allocations());
+
+ ASSERT_OK(pool->Reallocate(100, 150, &data1)); // Grow data1
+
+ ASSERT_EQ(150, pool->max_memory());
+ ASSERT_EQ(150 + 50, pool->total_bytes_allocated());
+ ASSERT_EQ(150, pool->bytes_allocated());
+ ASSERT_EQ(3, pool->num_allocations());
+
+ ASSERT_OK(pool->Reallocate(150, 50, &data1)); // Shrink data1
+
+ ASSERT_EQ(150, pool->max_memory());
+ ASSERT_EQ(200, pool->total_bytes_allocated());
+ ASSERT_EQ(50, pool->bytes_allocated());
+ ASSERT_EQ(4, pool->num_allocations());
+
+ pool->Free(data1, 50);
+
+ ASSERT_EQ(150, pool->max_memory());
+ ASSERT_EQ(200, pool->total_bytes_allocated());
+ ASSERT_EQ(0, pool->bytes_allocated());
+ ASSERT_EQ(4, pool->num_allocations());
}
#endif // ARROW_VALGRIND
diff --git a/cpp/src/arrow/stl_allocator.h b/cpp/src/arrow/stl_allocator.h
index 9d921d4344..a1f4ae9feb 100644
--- a/cpp/src/arrow/stl_allocator.h
+++ b/cpp/src/arrow/stl_allocator.h
@@ -130,13 +130,19 @@ class STLMemoryPool : public MemoryPool {
void Free(uint8_t* buffer, int64_t size, int64_t /*alignment*/) override {
alloc_.deallocate(buffer, size);
- stats_.UpdateAllocatedBytes(-size);
+ stats_.UpdateAllocatedBytes(-size, /*is_free=*/true);
}
int64_t bytes_allocated() const override { return stats_.bytes_allocated(); }
int64_t max_memory() const override { return stats_.max_memory(); }
+ int64_t total_bytes_allocated() const override {
+ return stats_.total_bytes_allocated();
+ }
+
+ int64_t num_allocations() const override { return stats_.num_allocations(); }
+
std::string backend_name() const override { return "stl"; }
private:
diff --git a/cpp/src/arrow/util/benchmark_util.h
b/cpp/src/arrow/util/benchmark_util.h
index 5a5f51df5a..19abb7e1b3 100644
--- a/cpp/src/arrow/util/benchmark_util.h
+++ b/cpp/src/arrow/util/benchmark_util.h
@@ -21,7 +21,10 @@
#include "benchmark/benchmark.h"
+#include "arrow/memory_pool.h"
+#include "arrow/type_fwd.h"
#include "arrow/util/cpu_info.h"
+#include "arrow/util/logging.h" // IWYU pragma: keep
namespace arrow {
@@ -136,4 +139,66 @@ struct RegressionArgs {
bool size_is_bytes_;
};
+class MemoryPoolMemoryManager : public benchmark::MemoryManager {
+ void Start() override {
+ memory_pool = std::make_shared<ProxyMemoryPool>(default_memory_pool());
+
+ MemoryPool* default_pool = default_memory_pool();
+ global_allocations_start = default_pool->num_allocations();
+ }
+
+ void Stop(benchmark::MemoryManager::Result* result) override {
+ // If num_allocations is still zero, we assume that the memory pool wasn't
passed down
+ // so we should record them.
+ MemoryPool* default_pool = default_memory_pool();
+ int64_t new_default_allocations =
+ default_pool->num_allocations() - global_allocations_start;
+
+ // Only record metrics metrics if (1) there were allocations and (2) we
+ // recorded at least one.
+ if (new_default_allocations > 0 && memory_pool->num_allocations() > 0) {
+ if (new_default_allocations > memory_pool->num_allocations()) {
+ // If we missed some, let's report that.
+ int64_t missed_allocations =
+ new_default_allocations - memory_pool->num_allocations();
+ ARROW_LOG(WARNING) << "BenchmarkMemoryTracker recorded some
allocations "
+ << "for a benchmark, but missed " <<
missed_allocations
+ << " allocations.\n";
+ }
+
+ result->max_bytes_used = memory_pool->max_memory();
+ result->total_allocated_bytes = memory_pool->total_bytes_allocated();
+ result->num_allocs = memory_pool->num_allocations();
+ }
+ }
+
+ public:
+ std::shared_ptr<::arrow::ProxyMemoryPool> memory_pool;
+
+ protected:
+ int64_t global_allocations_start;
+};
+
+/// \brief Track memory pool allocations in benchmarks.
+///
+/// Instantiate as a global variable to register the hooks into Google
Benchmark
+/// to collect memory metrics. Before each benchmark, a new ProxyMemoryPool is
+/// created. It can then be accessed with memory_pool(). Once the benchmark is
+/// complete, the hook will record the maximum memory used, the total bytes
+/// allocated, and the total number of allocations. If no allocations were
seen,
+/// (for example, if you forgot to pass down the memory pool), then these
metrics
+/// will not be saved.
+///
+/// Since this is used as one global variable, this will not work if multiple
+/// benchmarks are run concurrently or for multi-threaded benchmarks (ones
+/// that use `->ThreadRange(...)`).
+class BenchmarkMemoryTracker {
+ public:
+ BenchmarkMemoryTracker() : manager_() {
::benchmark::RegisterMemoryManager(&manager_); }
+ ::arrow::MemoryPool* memory_pool() const { return
manager_.memory_pool.get(); }
+
+ protected:
+ ::arrow::MemoryPoolMemoryManager manager_;
+};
+
} // namespace arrow
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 56b2000c84..79e181c957 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -61,8 +61,8 @@ ARROW_CARES_BUILD_VERSION=1.17.2
ARROW_CARES_BUILD_SHA256_CHECKSUM=4803c844ce20ce510ef0eb83f8ea41fa24ecaae9d280c468c582d2bb25b3913d
ARROW_CRC32C_BUILD_VERSION=1.1.2
ARROW_CRC32C_BUILD_SHA256_CHECKSUM=ac07840513072b7fcebda6e821068aa04889018f24e10e46181068fb214d7e56
-ARROW_GBENCHMARK_BUILD_VERSION=v1.6.0
-ARROW_GBENCHMARK_BUILD_SHA256_CHECKSUM=1f71c72ce08d2c1310011ea6436b31e39ccab8c2db94186d26657d41747c85d6
+ARROW_GBENCHMARK_BUILD_VERSION=v1.7.1
+ARROW_GBENCHMARK_BUILD_SHA256_CHECKSUM=6430e4092653380d9dc4ccb45a1e2dc9259d581f4866dc0759713126056bc1d7
ARROW_GFLAGS_BUILD_VERSION=v2.2.2
ARROW_GFLAGS_BUILD_SHA256_CHECKSUM=34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf
ARROW_GLOG_BUILD_VERSION=v0.5.0
diff --git a/java/dataset/src/main/cpp/jni_util.cc
b/java/dataset/src/main/cpp/jni_util.cc
index 0aba2e5121..f2f6871973 100644
--- a/java/dataset/src/main/cpp/jni_util.cc
+++ b/java/dataset/src/main/cpp/jni_util.cc
@@ -115,6 +115,10 @@ class ReservationListenableMemoryPool::Impl {
int64_t max_memory() { return stats_.max_memory(); }
+ int64_t total_bytes_allocated() { return stats_.total_bytes_allocated(); }
+
+ int64_t num_allocations() { return stats_.num_allocations(); }
+
std::string backend_name() { return pool_->backend_name(); }
std::shared_ptr<ReservationListener> get_listener() { return listener_; }
@@ -158,6 +162,14 @@ int64_t ReservationListenableMemoryPool::max_memory()
const {
return impl_->max_memory();
}
+int64_t ReservationListenableMemoryPool::total_bytes_allocated() const {
+ return impl_->total_bytes_allocated();
+}
+
+int64_t ReservationListenableMemoryPool::num_allocations() const {
+ return impl_->num_allocations();
+}
+
std::string ReservationListenableMemoryPool::backend_name() const {
return impl_->backend_name();
}
diff --git a/java/dataset/src/main/cpp/jni_util.h
b/java/dataset/src/main/cpp/jni_util.h
index 5697a82c8d..20482a6c54 100644
--- a/java/dataset/src/main/cpp/jni_util.h
+++ b/java/dataset/src/main/cpp/jni_util.h
@@ -158,6 +158,10 @@ class ReservationListenableMemoryPool : public
arrow::MemoryPool {
int64_t max_memory() const override;
+ int64_t total_bytes_allocated() const override;
+
+ int64_t num_allocations() const override;
+
std::string backend_name() const override;
std::shared_ptr<ReservationListener> get_listener();
diff --git a/r/src/memorypool.cpp b/r/src/memorypool.cpp
index 7c3deec98f..027aa8ef2a 100644
--- a/r/src/memorypool.cpp
+++ b/r/src/memorypool.cpp
@@ -45,6 +45,12 @@ class GcMemoryPool : public arrow::MemoryPool {
int64_t max_memory() const override { return pool_->max_memory(); }
+ int64_t total_bytes_allocated() const override {
+ return pool_->total_bytes_allocated();
+ }
+
+ int64_t num_allocations() const override { return pool_->num_allocations(); }
+
std::string backend_name() const override { return pool_->backend_name(); }
private: