This is an automated email from the ASF dual-hosted git repository. masahi pushed a commit to branch unity in repository https://gitbox.apache.org/repos/asf/tvm.git
commit adb9edfff08d9e0d7932e191497ba194cf737fa6 Author: Masahiro Masuda <masahi...@gmail.com> AuthorDate: Mon Nov 6 04:36:28 2023 +0000 (Hacky) VM allocator changes to support batched serving with memory profiling --- include/tvm/runtime/memory/memory_manager.h | 4 +++- src/runtime/memory/memory_manager.cc | 17 +++++++++++++++++ src/runtime/memory/naive_allocator.h | 2 +- src/runtime/memory/pooled_allocator.h | 29 +++++++++++++++++++++++++++-- 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/include/tvm/runtime/memory/memory_manager.h b/include/tvm/runtime/memory/memory_manager.h index 8e4ed4875e..0f720de940 100644 --- a/include/tvm/runtime/memory/memory_manager.h +++ b/include/tvm/runtime/memory/memory_manager.h @@ -94,7 +94,7 @@ class Allocator { /*! \brief The amount of memory currently allocated. * \return The amount of memory currently allocated. */ - virtual size_t UsedMemory() const = 0; + virtual size_t UsedMemory() = 0; protected: virtual Buffer Alloc(Device dev, ShapeTuple shape, DLDataType type_hint, @@ -124,6 +124,8 @@ class MemoryManager { /*! \brief Clear the allocators. */ static void Clear(); + static size_t UsedMemory(Device dev); + private: MemoryManager() {} diff --git a/src/runtime/memory/memory_manager.cc b/src/runtime/memory/memory_manager.cc index e903f514f8..013c3dea64 100644 --- a/src/runtime/memory/memory_manager.cc +++ b/src/runtime/memory/memory_manager.cc @@ -177,6 +177,19 @@ void MemoryManager::Clear() { } } +size_t MemoryManager::UsedMemory(Device dev) { + MemoryManager* m = MemoryManager::Global(); + std::lock_guard<std::mutex> lock(m->mu_); + auto alloc_type = AllocatorType::kPooled; + if (m->allocators_.count(dev)) { + return m->allocators_.at(dev).at(alloc_type)->UsedMemory(); + } + // For Disco, all devices will be queried with the same `dev`. When the device ID of the + // queried device is different from the one used by this VM instance, we cannot return + // a meaningful value. + return 0; +} + NDArray Allocator::Empty(ShapeTuple shape, DLDataType dtype, DLDevice dev, Optional<String> mem_scope) { VerifyDataType(dtype); @@ -217,6 +230,10 @@ void Allocator::Clear() { TVM_REGISTER_GLOBAL("vm.builtin.memory_manager.clear").set_body_typed(MemoryManager::Clear); +TVM_REGISTER_GLOBAL("vm.memory_manager.get_used_memory").set_body_typed([](Device dev) { + return static_cast<int64_t>(MemoryManager::UsedMemory(dev)); +}); + } // namespace memory } // namespace runtime } // namespace tvm diff --git a/src/runtime/memory/naive_allocator.h b/src/runtime/memory/naive_allocator.h index 4ab96bdfd5..968ad82090 100644 --- a/src/runtime/memory/naive_allocator.h +++ b/src/runtime/memory/naive_allocator.h @@ -79,7 +79,7 @@ class NaiveAllocator final : public Allocator { DLOG(INFO) << "free " << buffer.size << " B, used memory " << used_memory_ << " B"; } - size_t UsedMemory() const override { return used_memory_.load(std::memory_order_relaxed); } + size_t UsedMemory() override { return used_memory_.load(std::memory_order_relaxed); } private: std::atomic<size_t> used_memory_; diff --git a/src/runtime/memory/pooled_allocator.h b/src/runtime/memory/pooled_allocator.h index 826af49e5a..88d858a27c 100644 --- a/src/runtime/memory/pooled_allocator.h +++ b/src/runtime/memory/pooled_allocator.h @@ -26,6 +26,7 @@ #include <tvm/runtime/device_api.h> #include <tvm/runtime/memory/memory_manager.h> +#include <algorithm> #include <atomic> #include <mutex> #include <string> @@ -53,14 +54,31 @@ class PooledAllocator final : public Allocator { auto&& pool = it->second; auto ret = pool.back(); pool.pop_back(); + auto it2 = free_sizes_.find(size); + ICHECK(it2 != free_sizes_.end()); + free_sizes_.erase(it2); return ret; } + + if (recycle_eager) { + if (auto it = std::lower_bound(free_sizes_.begin(), free_sizes_.end(), size); + it != free_sizes_.end()) { + auto&& pool = memory_pool_[*it]; + auto ret = pool.back(); + ICHECK(ret.size > 0); + pool.pop_back(); + free_sizes_.erase(it); + return ret; + } + } + Buffer buf; buf.device = device_; buf.size = size; buf.alloc_type = kPooled; try { - buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, size, alignment, type_hint); + buf.data = + runtime::DeviceAPI::Get(device_)->AllocDataSpace(device_, size, alignment, type_hint); } catch (InternalError& err) { LOG(WARNING) << "PooledAllocator got InternalError during allocation: " << err.message(); LOG(WARNING) << "Trying to release all unused memory and reallocate..."; @@ -88,11 +106,16 @@ class PooledAllocator final : public Allocator { } memory_pool_.at(buffer.size).push_back(buffer); VLOG(1) << "reclaim buffer " << buffer.size; + free_sizes_.insert(buffer.size); } void Clear() override { ReleaseAll(); } - size_t UsedMemory() const override { return used_memory_.load(std::memory_order_relaxed); } + size_t UsedMemory() override { + // HACK to disable eager recycling during memory profiling + recycle_eager = true; + return used_memory_.load(std::memory_order_relaxed); + } private: void ReleaseAll() { @@ -113,7 +136,9 @@ class PooledAllocator final : public Allocator { std::atomic<size_t> used_memory_; std::unordered_map<size_t, std::vector<Buffer>> memory_pool_; std::recursive_mutex mu_; + std::multiset<size_t> free_sizes_; Device device_; + bool recycle_eager = false; }; } // namespace memory