(tvm) 02/19: (Hacky) VM allocator changes to support batched serving with memory profiling

masahi Mon, 18 Dec 2023 01:58:07 -0800

This is an automated email from the ASF dual-hosted git repository.

masahi pushed a commit to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git


commit adb9edfff08d9e0d7932e191497ba194cf737fa6
Author: Masahiro Masuda <masahi...@gmail.com>
AuthorDate: Mon Nov 6 04:36:28 2023 +0000

    (Hacky) VM allocator changes to support batched serving with memory 
profiling
---
 include/tvm/runtime/memory/memory_manager.h |  4 +++-
 src/runtime/memory/memory_manager.cc        | 17 +++++++++++++++++
 src/runtime/memory/naive_allocator.h        |  2 +-
 src/runtime/memory/pooled_allocator.h       | 29 +++++++++++++++++++++++++++--
 4 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/include/tvm/runtime/memory/memory_manager.h 
b/include/tvm/runtime/memory/memory_manager.h
index 8e4ed4875e..0f720de940 100644
--- a/include/tvm/runtime/memory/memory_manager.h
+++ b/include/tvm/runtime/memory/memory_manager.h
@@ -94,7 +94,7 @@ class Allocator {
   /*! \brief The amount of memory currently allocated.
    *  \return The amount of memory currently allocated.
    */
-  virtual size_t UsedMemory() const = 0;
+  virtual size_t UsedMemory() = 0;
 
  protected:
   virtual Buffer Alloc(Device dev, ShapeTuple shape, DLDataType type_hint,
@@ -124,6 +124,8 @@ class MemoryManager {
   /*! \brief Clear the allocators. */
   static void Clear();
 
+  static size_t UsedMemory(Device dev);
+
  private:
   MemoryManager() {}
 
diff --git a/src/runtime/memory/memory_manager.cc 
b/src/runtime/memory/memory_manager.cc
index e903f514f8..013c3dea64 100644
--- a/src/runtime/memory/memory_manager.cc
+++ b/src/runtime/memory/memory_manager.cc
@@ -177,6 +177,19 @@ void MemoryManager::Clear() {
   }
 }
 
+size_t MemoryManager::UsedMemory(Device dev) {
+  MemoryManager* m = MemoryManager::Global();
+  std::lock_guard<std::mutex> lock(m->mu_);
+  auto alloc_type = AllocatorType::kPooled;
+  if (m->allocators_.count(dev)) {
+    return m->allocators_.at(dev).at(alloc_type)->UsedMemory();
+  }
+  // For Disco, all devices will be queried with the same `dev`. When the 
device ID of the
+  // queried device is different from the one used by this VM instance, we 
cannot return
+  // a meaningful value.
+  return 0;
+}
+
 NDArray Allocator::Empty(ShapeTuple shape, DLDataType dtype, DLDevice dev,
                          Optional<String> mem_scope) {
   VerifyDataType(dtype);
@@ -217,6 +230,10 @@ void Allocator::Clear() {
 
 
TVM_REGISTER_GLOBAL("vm.builtin.memory_manager.clear").set_body_typed(MemoryManager::Clear);
 
+TVM_REGISTER_GLOBAL("vm.memory_manager.get_used_memory").set_body_typed([](Device
 dev) {
+  return static_cast<int64_t>(MemoryManager::UsedMemory(dev));
+});
+
 }  // namespace memory
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/memory/naive_allocator.h 
b/src/runtime/memory/naive_allocator.h
index 4ab96bdfd5..968ad82090 100644
--- a/src/runtime/memory/naive_allocator.h
+++ b/src/runtime/memory/naive_allocator.h
@@ -79,7 +79,7 @@ class NaiveAllocator final : public Allocator {
     DLOG(INFO) << "free " << buffer.size << " B, used memory " << used_memory_ 
<< " B";
   }
 
-  size_t UsedMemory() const override { return 
used_memory_.load(std::memory_order_relaxed); }
+  size_t UsedMemory() override { return 
used_memory_.load(std::memory_order_relaxed); }
 
  private:
   std::atomic<size_t> used_memory_;
diff --git a/src/runtime/memory/pooled_allocator.h 
b/src/runtime/memory/pooled_allocator.h
index 826af49e5a..88d858a27c 100644
--- a/src/runtime/memory/pooled_allocator.h
+++ b/src/runtime/memory/pooled_allocator.h
@@ -26,6 +26,7 @@
 #include <tvm/runtime/device_api.h>
 #include <tvm/runtime/memory/memory_manager.h>
 
+#include <algorithm>
 #include <atomic>
 #include <mutex>
 #include <string>
@@ -53,14 +54,31 @@ class PooledAllocator final : public Allocator {
       auto&& pool = it->second;
       auto ret = pool.back();
       pool.pop_back();
+      auto it2 = free_sizes_.find(size);
+      ICHECK(it2 != free_sizes_.end());
+      free_sizes_.erase(it2);
       return ret;
     }
+
+    if (recycle_eager) {
+      if (auto it = std::lower_bound(free_sizes_.begin(), free_sizes_.end(), 
size);
+          it != free_sizes_.end()) {
+        auto&& pool = memory_pool_[*it];
+        auto ret = pool.back();
+        ICHECK(ret.size > 0);
+        pool.pop_back();
+        free_sizes_.erase(it);
+        return ret;
+      }
+    }
+
     Buffer buf;
     buf.device = device_;
     buf.size = size;
     buf.alloc_type = kPooled;
     try {
-      buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, size, 
alignment, type_hint);
+      buf.data =
+          runtime::DeviceAPI::Get(device_)->AllocDataSpace(device_, size, 
alignment, type_hint);
     } catch (InternalError& err) {
       LOG(WARNING) << "PooledAllocator got InternalError during allocation: " 
<< err.message();
       LOG(WARNING) << "Trying to release all unused memory and reallocate...";
@@ -88,11 +106,16 @@ class PooledAllocator final : public Allocator {
     }
     memory_pool_.at(buffer.size).push_back(buffer);
     VLOG(1) << "reclaim buffer " << buffer.size;
+    free_sizes_.insert(buffer.size);
   }
 
   void Clear() override { ReleaseAll(); }
 
-  size_t UsedMemory() const override { return 
used_memory_.load(std::memory_order_relaxed); }
+  size_t UsedMemory() override {
+    // HACK to disable eager recycling during memory profiling
+    recycle_eager = true;
+    return used_memory_.load(std::memory_order_relaxed);
+  }
 
  private:
   void ReleaseAll() {
@@ -113,7 +136,9 @@ class PooledAllocator final : public Allocator {
   std::atomic<size_t> used_memory_;
   std::unordered_map<size_t, std::vector<Buffer>> memory_pool_;
   std::recursive_mutex mu_;
+  std::multiset<size_t> free_sizes_;
   Device device_;
+  bool recycle_eager = false;
 };
 
 }  // namespace memory

(tvm) 02/19: (Hacky) VM allocator changes to support batched serving with memory profiling

Reply via email to