pitrou commented on code in PR #40647:
URL: https://github.com/apache/arrow/pull/40647#discussion_r1532146660
##########
cpp/src/arrow/memory_pool.h:
##########
@@ -35,44 +36,62 @@ namespace internal {
///////////////////////////////////////////////////////////////////////
// Helper tracking memory statistics
-class MemoryPoolStats {
- public:
- MemoryPoolStats() : bytes_allocated_(0), max_memory_(0) {}
-
- int64_t max_memory() const { return max_memory_.load(); }
-
- int64_t bytes_allocated() const { return bytes_allocated_.load(); }
-
- int64_t total_bytes_allocated() const { return
total_allocated_bytes_.load(); }
+class alignas(/*CacheLineSize=*/64) MemoryPoolStats {
+ private:
+ // All atomics are updated according to Acquire-Release ordering.
+ //
https://en.cppreference.com/w/cpp/atomic/memory_order#Release-Acquire_ordering
+ //
+ // max_memory_, total_allocated_bytes_, and num_allocs_ only go up (they are
+ // monotonically increasing) which can allow some optimizations.
+ std::atomic<int64_t> max_memory_{0};
+ std::atomic<int64_t> bytes_allocated_{0};
+ std::atomic<int64_t> total_allocated_bytes_{0};
+ std::atomic<int64_t> num_allocs_{0};
- int64_t num_allocations() const { return num_allocs_.load(); }
+ public:
+ int64_t max_memory() const { return
max_memory_.load(std::memory_order_acquire); }
- inline void UpdateAllocatedBytes(int64_t diff, bool is_free = false) {
- auto allocated = bytes_allocated_.fetch_add(diff) + diff;
- // "maximum" allocated memory is ill-defined in multi-threaded code,
- // so don't try to be too rigorous here
- if (diff > 0 && allocated > max_memory_) {
- max_memory_ = allocated;
- }
+ int64_t bytes_allocated() const {
+ return bytes_allocated_.load(std::memory_order_acquire);
+ }
- // Reallocations might just expand/contract the allocation in place or
might
- // copy to a new location. We can't really know, so we just represent the
- // optimistic case.
- if (diff > 0) {
- total_allocated_bytes_ += diff;
- }
+ int64_t total_bytes_allocated() const {
+ return total_allocated_bytes_.load(std::memory_order_acquire);
+ }
- // We count any reallocation as a allocation.
- if (!is_free) {
- num_allocs_ += 1;
+ int64_t num_allocations() const { return
num_allocs_.load(std::memory_order_acquire); }
+
+ template <bool IsFree = false>
+ inline void UpdateAllocatedBytes(int64_t diff) {
+ // max_memory_ is monotonically increasing, so we can use a relaxed load
+ // before the read-modify-write. Issue the load before everything else.
+ auto max_memory = max_memory_.load(std::memory_order_relaxed);
+ auto old_bytes_allocated =
+ bytes_allocated_.fetch_add(diff, std::memory_order_acq_rel);
+ if constexpr (IsFree) {
Review Comment:
I'll note that the desired goal should not be to improve assembler traces in
a theoretical way, but to reduce potential overhead (ideally with benchmark
numbers, but I realize those might require execution on some particular
machine).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]