westonpace commented on code in PR #33731:
URL: https://github.com/apache/arrow/pull/33731#discussion_r1097451546
##########
cpp/src/arrow/util/benchmark_util.h:
##########
@@ -21,7 +21,10 @@
#include "benchmark/benchmark.h"
+#include "arrow/memory_pool.h"
+#include "arrow/type_fwd.h"
#include "arrow/util/cpu_info.h"
+#include "arrow/util/logging.h" // IWYU pragma: keep
Review Comment:
We normally try to keep `logging.h` out of public headers. Though I don't
really know if `benchmark_util.h` is a public header or not :shrug:
Why is everything defined inline here instead of in an cc file?
##########
cpp/src/arrow/util/benchmark_util.h:
##########
@@ -136,4 +139,62 @@ struct RegressionArgs {
bool size_is_bytes_;
};
+class MemoryPoolMemoryManager : public benchmark::MemoryManager {
+ void Start() override {
+ memory_pool = std::make_shared<ProxyMemoryPool>(default_memory_pool());
+
+ MemoryPool* default_pool = default_memory_pool();
+ global_allocations_start = default_pool->num_allocations();
+ }
+
+ void Stop(benchmark::MemoryManager::Result* result) override {
+ // If num_allocations is still zero, we assume that the memory pool wasn't
passed down
+ // so we should record them.
+ MemoryPool* default_pool = default_memory_pool();
+ int64_t new_default_allocations =
+ default_pool->num_allocations() - global_allocations_start;
+
+ // Only record metrics metrics if (1) there were allocations and (2) we
+ // recorded at least one.
+ if (new_default_allocations > 0 && memory_pool->num_allocations() > 0) {
+ if (new_default_allocations > memory_pool->num_allocations()) {
+ // If we missed some, let's report that.
+ int64_t missed_allocations =
+ new_default_allocations - memory_pool->num_allocations();
+ ARROW_LOG(WARNING) << "BenchmarkMemoryTracker recorded some
allocations "
+ << "for a benchmark, but missed " <<
missed_allocations
+ << " allocations.\n";
+ }
+
+ result->max_bytes_used = memory_pool->max_memory();
+ result->total_allocated_bytes = memory_pool->total_bytes_allocated();
+ result->num_allocs = memory_pool->num_allocations();
+ }
+ }
+
+ public:
+ std::shared_ptr<::arrow::ProxyMemoryPool> memory_pool;
+
+ protected:
+ int64_t global_allocations_start;
+};
+
+/// \brief Track memory pool allocations in benchmarks.
+///
+/// Instantiate as a global variable to register the hooks into Google
Benchmark
+/// to collect memory metrics. Before each benchmark, a new ProxyMemoryPool is
+/// created. It can then be accessed with memory_pool(). Once the benchmark is
+/// complete, the hook will record the maximum memory used, the total bytes
+/// allocated, and the total number of allocations. If no allocations were
seen,
+/// (for example, if you forgot to pass down the memory pool), then these
metrics
+/// will not be saved.
Review Comment:
How does this work with multi-threaded benchmarks? Is the memory manager
invoked per thread in some way? Or one single memory manager shared across all
threads?
I suspect the latter but it might be good to clarify here for future readers.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]