pitrou commented on a change in pull request #12116:
URL: https://github.com/apache/arrow/pull/12116#discussion_r805997122



##########
File path: cpp/src/arrow/memory_pool.cc
##########
@@ -603,14 +643,109 @@ class BaseMemoryPoolImpl : public MemoryPool {
     stats_.UpdateAllocatedBytes(-size);
   }
 
-  void ReleaseUnused() override { Allocator::ReleaseUnused(); }
+ protected:
+  virtual Status AllocateImmutableZeros(int64_t size, uint8_t** out) {
+#ifdef USE_MMAP_FOR_IMMUTABLE_ZEROS
+    if (size > 0) {
+      *out = static_cast<uint8_t*>(mmap(
+          nullptr, size, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS | 
MAP_NORESERVE, -1, 0));

Review comment:
       I'm not sure this is useful, because the underlying allocator probably 
already relies on mmap for large-ish allocations.

##########
File path: cpp/src/arrow/memory_pool.cc
##########
@@ -603,14 +643,109 @@ class BaseMemoryPoolImpl : public MemoryPool {
     stats_.UpdateAllocatedBytes(-size);
   }
 
-  void ReleaseUnused() override { Allocator::ReleaseUnused(); }
+ protected:
+  virtual Status AllocateImmutableZeros(int64_t size, uint8_t** out) {
+#ifdef USE_MMAP_FOR_IMMUTABLE_ZEROS
+    if (size > 0) {
+      *out = static_cast<uint8_t*>(mmap(
+          nullptr, size, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS | 
MAP_NORESERVE, -1, 0));
+      if (*out == MAP_FAILED) {
+        auto err = errno;
+        return Status::OutOfMemory("Failed to allocate zero buffer of size ", 
size, ": ",
+                                   strerror(err));
+      }
+      return Status::OK();
+    }
+#endif
+    RETURN_NOT_OK(Allocate(size, out));
+    std::memset(*out, 0, size);
+    return Status::OK();
+  }
+
+  void FreeImmutableZeros(uint8_t* buffer, int64_t size) override {
+#ifdef USE_MMAP_FOR_IMMUTABLE_ZEROS
+    if (size > 0) {
+      munmap(buffer, size);
+      return;
+    }
+#endif
+    Free(buffer, size);
+  }
+
+ public:
+  Result<std::shared_ptr<Buffer>> GetImmutableZeros(int64_t size) override {
+    // Thread-safely get the current largest buffer of zeros.

Review comment:
       This algorithm seems quite complicated and costly (in terms of 
inter-thread synchronization).
   Instead, how about maintaining lazily-allocated power-of-two-sized entries 
(perhaps starting with 4096 to avoid having a ton of tiny size classes)? 
Something like:
   ```c++
   
   static constexpr int kMinPower2Size = 12;  // 4096 bytes
   static constexpr int kNumAllocClasses = 64 - kMinPower2Size;
   // Addresses of allocated zeros, for each size class
   std::array<std::atomic<uintptr_t>, kNumAllocClasses> allocated_;
   std::mutex allocation_mutex_;
   
   Result<const uint8_t*> GetImmutableZeros(int64_t size) override {
     const auto size_class = std::max(0, bit_util::Log2(size) - kMinPower2Size);
     auto addr = allocated_[size_class].load();
     if (ARROW_PREDICT_TRUE(addr != nullptr)) {
       // Fast path: already allocated
       return reinterpret_cast<const uint8_t*>(addr);
     }
     // Slow path: allocate if not already done
     std::lock_guard<std::mutex> lock(allocation_mutex_);
     auto addr = allocated_[size_class].load();
     if (addr == nullptr) {
       const int64_t alloc_size = static_cast<int64_t>(1) << (size_class + 
kMinPower2Size);
       ARROW_ASSIGN_OR_RAISE(const uint8_t* data, AllocateImmutableZeros(size));
       allocated_[size_class] = addr = reinterpret_cast<uintptr_t>(data);
     }
     return reinterpret_cast<const uint8_t*>(addr);
   }
   ```

##########
File path: cpp/src/arrow/device.h
##########
@@ -223,4 +236,55 @@ class ARROW_EXPORT CPUMemoryManager : public MemoryManager 
{
 ARROW_EXPORT
 std::shared_ptr<MemoryManager> default_cpu_memory_manager();
 
+/// A memory manager that uses the immutable zeros interface of the given 
memory pool,
+/// rather than the normal mutable buffer interface.

Review comment:
       Hmm, I honestly don't understand why it would be useful to implement 
this, while we're already exposing additional methods to `MemoryPool`.

##########
File path: cpp/src/arrow/memory_pool.cc
##########
@@ -536,6 +543,39 @@ int64_t MemoryPool::max_memory() const { return -1; }
 // MemoryPool implementation that delegates its core duty
 // to an Allocator class.
 
+class ImmutableZeros : public Buffer {
+ public:
+  explicit ImmutableZeros(uint8_t* data, int64_t size, MemoryPool* pool)
+      : Buffer(data, size, CPUDevice::immutable_zeros_memory_manager(pool)),
+        pool_(pool) {}
+
+  ImmutableZeros() : Buffer(nullptr, 0), pool_(nullptr) {}
+
+  ~ImmutableZeros() override;
+
+  // Prevent copies and handle moves explicitly to avoid double free

Review comment:
       It sounds hackish to have a separate ImmutableZeros class, IMHO.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to