memory_manager (#15882)

tqchen Tue, 31 Oct 2023 10:37:35 -0700

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git



The following commit(s) were added to refs/heads/unity by this push:
     new a9c81a7cc1 [Unity] Replace relax_vm/memory_manager with 
memory/memory_manager (#15882)
a9c81a7cc1 is described below

commit a9c81a7cc114fd98065223a4d5be1952b06196b2
Author: Yong Wu <[email protected]>
AuthorDate: Tue Oct 31 10:37:25 2023 -0700

    [Unity] Replace relax_vm/memory_manager with memory/memory_manager (#15882)
---
 include/tvm/runtime/relax_vm/memory_manager.h | 152 ------------------
 include/tvm/runtime/relax_vm/vm.h             |   9 +-
 src/runtime/disco/builtin.cc                  |   4 +-
 src/runtime/relax_vm/builtin.cc               |   2 +-
 src/runtime/relax_vm/lm_support.cc            |  46 +-----
 src/runtime/relax_vm/memory_manager.cc        | 214 --------------------------
 src/runtime/relax_vm/naive_allocator.h        |  86 -----------
 src/runtime/relax_vm/pooled_allocator.h       | 111 -------------
 tests/python/relax/test_runtime_builtin.py    |  24 ---
 9 files changed, 12 insertions(+), 636 deletions(-)

diff --git a/include/tvm/runtime/relax_vm/memory_manager.h 
b/include/tvm/runtime/relax_vm/memory_manager.h
deleted file mode 100644
index ed939fb88f..0000000000
--- a/include/tvm/runtime/relax_vm/memory_manager.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/relax_vm/memory_manager.h
- * \brief Abstract device memory management API
- */
-#ifndef TVM_RUNTIME_RELAX_VM_MEMORY_MANAGER_H_
-#define TVM_RUNTIME_RELAX_VM_MEMORY_MANAGER_H_
-
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/ndarray.h>
-
-#include <functional>
-#include <memory>
-#include <mutex>
-#include <unordered_map>
-#include <vector>
-
-namespace tvm {
-namespace runtime {
-namespace relax_vm {
-
-struct Buffer {
-  /*! \brief The pointer to the allocated block of memory. */
-  void* data{nullptr};
-  /*! \brief The size of the block. */
-  size_t size{0};
-  /*! \brief The device of the allocated buffers. */
-  Device device;
-};
-
-enum AllocatorType {
-  kNaive = 1,
-  kPooled,
-};
-
-class Allocator {
- public:
-  explicit Allocator(AllocatorType type) : type_(type) {}
-  virtual ~Allocator() = default;
-  /*! \brief Allocate an empty NDArray using from the allocator.
-   *  \param shape The shape of the NDArray.
-   *  \param dtype The datatype of the NDArray.
-   *  \param dev The device where the array is allocated.
-   *  \return The empty NDArray.
-   */
-  runtime::NDArray Empty(ShapeTuple shape, DLDataType dtype, Device dev);
-  /*! \brief Return the allocator type. */
-  inline AllocatorType type() const { return type_; }
-  /*! \brief Allocate a buffer given a size, alignment and type.
-   *  \param nbytes The size of the buffer.
-   *  \param alignment The alignment of the buffer.
-   *  \param type_hint A type hint to the allocator.
-   *  \return A sized allocation in the form of a buffer.
-   */
-  virtual Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) 
= 0;
-  /*! \brief Allocate a buffer given a size, alignment and type.
-   *  \param shape The shape of allocated tensor.
-   *  \param dtype A type hint to the allocator.
-   *  \param mem_scope The memory scope of allocated tensor.
-   *  \return A sized allocation in the form of a buffer.
-   */
-  virtual Buffer Alloc(ShapeTuple shape, DLDataType dtype, String mem_scope);
-  /*! \brief Free a buffer allocated by the allocator.
-   *  \param buffer The buffer to free.
-   */
-  virtual void Free(const Buffer& buffer) = 0;
-
- private:
-  AllocatorType type_;
-};
-
-class MemoryManager {
- public:
-  static MemoryManager* Global();
-  /*!
-   * \brief Get or create an allocator given the device and allocator type.
-   * \param dev The TVM device
-   * \param type The allocator type
-   * \return The memory allocator.
-   */
-  static Allocator* GetOrCreateAllocator(Device dev, AllocatorType type);
-  /*!
-   * \brief Get an allocator given the device.
-   * \param dev The TVM device
-   * \return The memory allocator.
-   */
-  static Allocator* GetAllocator(Device dev);
-
-  /*! \brief Clear the allocators. */
-  static void Clear();
-
- private:
-  MemoryManager() {}
-
- private:
-  std::mutex mutex_;
-  std::unordered_map<Device, std::unique_ptr<Allocator>> allocators_;
-};
-
-/*! \brief An object representing a storage allocation. */
-class StorageObj : public Object {
- public:
-  /*! \brief The index into the VM function table. */
-  Buffer buffer;
-
-  /*! \brief Allocate an NDArray from a given piece of storage. */
-  runtime::NDArray AllocNDArray(uint64_t offset, ShapeTuple shape, DLDataType 
dtype);
-
-  /*! \brief The deleter for an NDArray when allocated from underlying 
storage. */
-  static void Deleter(Object* ptr);
-
-  ~StorageObj() {
-    auto alloc = MemoryManager::Global()->GetAllocator(buffer.device);
-    alloc->Free(buffer);
-  }
-
-  static constexpr const uint32_t _type_index = runtime::TypeIndex::kDynamic;
-  static constexpr const char* _type_key = "relax.Storage";
-  TVM_DECLARE_FINAL_OBJECT_INFO(StorageObj, Object);
-};
-
-/*! \brief reference to storage. */
-class Storage : public ObjectRef {
- public:
-  explicit Storage(Buffer buffer);
-
-  TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Storage, ObjectRef, StorageObj);
-};
-
-}  // namespace relax_vm
-}  // namespace runtime
-}  // namespace tvm
-
-#endif  // TVM_RUNTIME_RELAX_VM_MEMORY_MANAGER_H_
diff --git a/include/tvm/runtime/relax_vm/vm.h 
b/include/tvm/runtime/relax_vm/vm.h
index 95a2080159..4e1f1361be 100644
--- a/include/tvm/runtime/relax_vm/vm.h
+++ b/include/tvm/runtime/relax_vm/vm.h
@@ -31,12 +31,19 @@
 #include <string>
 #include <vector>
 
+#include "../memory/memory_manager.h"
 #include "./bytecode.h"
 #include "./executable.h"
-#include "./memory_manager.h"
 
 namespace tvm {
 namespace runtime {
+
+using memory::Allocator;
+using memory::AllocatorType;
+using memory::MemoryManager;
+using memory::Storage;
+using memory::StorageObj;
+
 namespace relax_vm {
 
 /*!
diff --git a/src/runtime/disco/builtin.cc b/src/runtime/disco/builtin.cc
index 5aea39cf66..514d633fa6 100644
--- a/src/runtime/disco/builtin.cc
+++ b/src/runtime/disco/builtin.cc
@@ -62,8 +62,8 @@ Module LoadVMModule(std::string path, Device device) {
       << "ValueError: File `" << path
       << "` is not built by RelaxVM, because `vm_initialization` does not 
exist";
   vm_initialization(static_cast<int>(device.device_type), 
static_cast<int>(device.device_id),
-                    static_cast<int>(relax_vm::AllocatorType::kPooled), 
static_cast<int>(kDLCPU), 0,
-                    static_cast<int>(relax_vm::AllocatorType::kPooled));
+                    static_cast<int>(AllocatorType::kPooled), 
static_cast<int>(kDLCPU), 0,
+                    static_cast<int>(AllocatorType::kPooled));
   return mod;
 }
 
diff --git a/src/runtime/relax_vm/builtin.cc b/src/runtime/relax_vm/builtin.cc
index a764c34cfa..4cd711dea0 100644
--- a/src/runtime/relax_vm/builtin.cc
+++ b/src/runtime/relax_vm/builtin.cc
@@ -24,12 +24,12 @@
 #include <tvm/runtime/device_api.h>
 #include <tvm/runtime/logging.h>
 #include <tvm/runtime/memory.h>
+#include <tvm/runtime/memory/memory_manager.h>
 #include <tvm/runtime/ndarray.h>
 #include <tvm/runtime/packed_func.h>
 #include <tvm/runtime/registry.h>
 #include <tvm/runtime/relax_vm/builtin.h>
 #include <tvm/runtime/relax_vm/bytecode.h>
-#include <tvm/runtime/relax_vm/memory_manager.h>
 #include <tvm/runtime/relax_vm/vm.h>
 
 #include "../runtime_base.h"
diff --git a/src/runtime/relax_vm/lm_support.cc 
b/src/runtime/relax_vm/lm_support.cc
index 5ff04ebf2a..e56a03fdea 100644
--- a/src/runtime/relax_vm/lm_support.cc
+++ b/src/runtime/relax_vm/lm_support.cc
@@ -39,8 +39,8 @@
 #include <tvm/runtime/device_api.h>
 #include <tvm/runtime/logging.h>
 #include <tvm/runtime/memory.h>
+#include <tvm/runtime/memory/memory_manager.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/relax_vm/memory_manager.h>
 #include <tvm/runtime/relax_vm/vm.h>
 
 #include <cmath>
@@ -245,56 +245,12 @@ class AttentionKVCache : public ObjectRef {
 
 TVM_REGISTER_OBJECT_TYPE(AttentionKVCacheObj);
 
-/*!
- * \brief Create multiple kv caches with same shape, from single memory 
allocation.
- * \param init_data The initial data to put into the cache. Ignored if 
init_fill_count is
- *        less than 0.
- * \param reserve_shape The shape of cache.
- * \param init_fill_count The initial row to fill into
- *        the cache.
- * \param num_caches Number of caches to create.
- */
-Array<AttentionKVCache> CreateMultipleKVCaches(NDArray init_data, ShapeTuple 
reserve_shape,
-                                               int init_fill_count, int 
num_caches) {
-  DLDataType dtype = init_data->dtype;
-
-  int64_t cache_size = (dtype.bits * dtype.lanes + 7) / 8;
-  for (const auto dim : reserve_shape) {
-    cache_size *= dim;
-  }
-
-  // Add padding to make each cache align to kAllocAlignment
-  using tvm::runtime::kAllocAlignment;
-  int64_t padding = (kAllocAlignment - cache_size % kAllocAlignment) % 
kAllocAlignment;
-  int64_t cache_offset = cache_size + padding;
-
-  Storage storage =
-      Storage(MemoryManager::GetOrCreateAllocator(init_data->device, 
AllocatorType::kNaive)
-                  ->Alloc(cache_offset * num_caches, kAllocAlignment, dtype));
-
-  Array<AttentionKVCache> result;
-  for (int i = 0; i < num_caches; ++i) {
-    auto c = make_object<AttentionKVCacheObj>();
-    c->data = storage->AllocNDArray(i * cache_offset, reserve_shape, dtype);
-    c->fill_count = 0;
-    if (init_fill_count > 0) {
-      c->Append(init_data);
-      c->fill_count = init_fill_count;
-    }
-    result.push_back(AttentionKVCache(c));
-  }
-  return result;
-}
-
 //-------------------------------------------------
 //  Register runtime functions
 //-------------------------------------------------
 TVM_REGISTER_GLOBAL("vm.builtin.attention_kv_cache_create")
     .set_body_typed(AttentionKVCache::Create);
 
-TVM_REGISTER_GLOBAL("vm.builtin.attention_kv_cache_create_multiple")
-    .set_body_typed(CreateMultipleKVCaches);
-
 AttentionKVCache AttentionKVCacheUpdate(AttentionKVCache cache, NDArray value) 
{
   cache->Update(value);
   return cache;
diff --git a/src/runtime/relax_vm/memory_manager.cc 
b/src/runtime/relax_vm/memory_manager.cc
deleted file mode 100644
index 66aaf473ea..0000000000
--- a/src/runtime/relax_vm/memory_manager.cc
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/relax_vm/memory_manager.cc
- * \brief Allocate and manage memory for the Relay VM.
- */
-#include <tvm/runtime/device_api.h>
-#include <tvm/runtime/memory.h>
-#include <tvm/runtime/registry.h>
-#include <tvm/runtime/relax_vm/memory_manager.h>
-
-#include <memory>
-#include <utility>
-
-#include "naive_allocator.h"
-#include "pooled_allocator.h"
-
-namespace tvm {
-namespace runtime {
-namespace relax_vm {
-
-static void BufferDeleter(Object* obj) {
-  auto* ptr = static_cast<runtime::NDArray::Container*>(obj);
-  ICHECK(ptr->manager_ctx != nullptr);
-  Buffer* buffer = reinterpret_cast<Buffer*>(ptr->manager_ctx);
-  MemoryManager::GetAllocator(buffer->device)->Free(*(buffer));
-  delete buffer;
-  delete ptr;
-}
-
-void StorageObj::Deleter(Object* obj) {
-  auto* ptr = static_cast<runtime::NDArray::Container*>(obj);
-  // When invoking AllocNDArray we don't own the underlying allocation
-  // and should not delete the buffer, but instead let it be reclaimed
-  // by the storage object's destructor.
-  //
-  // We did bump the reference count by 1 to keep alive the StorageObj
-  // allocation in case this NDArray is the sole owner.
-  //
-  // We decrement the object allowing for the buffer to release our
-  // reference count from allocation.
-  StorageObj* storage = reinterpret_cast<StorageObj*>(ptr->manager_ctx);
-  storage->DecRef();
-  delete ptr;
-}
-
-Storage::Storage(Buffer buffer) {
-  auto n = make_object<StorageObj>();
-  n->buffer = std::move(buffer);
-  data_ = std::move(n);
-}
-
-inline void VerifyDataType(DLDataType dtype) {
-  ICHECK_GE(dtype.lanes, 1);
-  if (dtype.code == kDLFloat) {
-    ICHECK_EQ(dtype.bits % 8, 0);
-  } else {
-    // allow uint1 as a special flag for bool.
-    if (dtype.bits == 1 && dtype.code == kDLUInt) return;
-    ICHECK_EQ(dtype.bits % 8, 0);
-  }
-  ICHECK_EQ(dtype.bits & (dtype.bits - 1), 0);
-}
-
-inline size_t GetDataAlignment(const DLTensor& arr) {
-  size_t align = (arr.dtype.bits / 8) * arr.dtype.lanes;
-  if (align < runtime::kAllocAlignment) return runtime::kAllocAlignment;
-  return align;
-}
-
-runtime::NDArray StorageObj::AllocNDArray(uint64_t offset, ShapeTuple shape, 
DLDataType dtype) {
-  VerifyDataType(dtype);
-
-  // critical zone: allocate header, cannot throw
-  runtime::NDArray::Container* container =
-      new runtime::NDArray::Container(nullptr, shape, dtype, 
this->buffer.device);
-
-  container->SetDeleter(StorageObj::Deleter);
-  size_t needed_size = runtime::GetDataSize(container->dl_tensor);
-  this->IncRef();
-  // The manager context pointer must continue to point to the storage object
-  // which owns the backing memory, and keeps track of the reference count.
-  //
-  // When we free a container we extract the storage object, decrement its
-  // reference count, then destroy the container, but leave the underlying
-  // buffer intact.
-  container->manager_ctx = reinterpret_cast<void*>(this);
-
-  // is this UB?
-  // The only change we make w.r.t offset is modifying the data pointer
-  // of the backing tensor to point into the buffer instead of its start.
-  auto offset_ptr = reinterpret_cast<uint8_t*>(this->buffer.data) + offset;
-  container->dl_tensor.data = reinterpret_cast<void*>(offset_ptr);
-
-  runtime::NDArray ret(runtime::GetObjectPtr<Object>(container));
-  // RAII in effect, now run the check.
-
-  ICHECK(offset + needed_size <= this->buffer.size)
-      << "storage allocation failure, attempted to allocate " << needed_size 
<< " at offset "
-      << offset << " in region that is " << this->buffer.size << "bytes";
-
-  return ret;
-}
-
-MemoryManager* MemoryManager::Global() {
-  // NOTE: explicitly use new to avoid exit-time destruction of global state
-  // Global state will be recycled by OS as the process exits.
-  static auto* inst = new MemoryManager();
-  return inst;
-}
-
-Allocator* MemoryManager::GetOrCreateAllocator(Device dev, AllocatorType type) 
{
-  MemoryManager* m = MemoryManager::Global();
-  std::lock_guard<std::mutex> lock(m->mutex_);
-  if (m->allocators_.find(dev) == m->allocators_.end()) {
-    std::unique_ptr<Allocator> alloc;
-    switch (type) {
-      case kNaive: {
-        DLOG(INFO) << "New naive allocator for " << dev;
-        alloc.reset(new NaiveAllocator(dev));
-        break;
-      }
-      case kPooled: {
-        DLOG(INFO) << "New pooled allocator for " << dev;
-        alloc.reset(new PooledAllocator(dev));
-        break;
-      }
-      default:
-        LOG(FATAL) << "Unknown allocator type: " << type;
-    }
-    auto ret = alloc.get();
-    m->allocators_.emplace(dev, std::move(alloc));
-    return ret;
-  }
-  auto alloc = m->allocators_.at(dev).get();
-  if (alloc->type() != type) {
-    LOG(WARNING) << "The type of existing allocator for " << dev
-                 << " is different from the request type (" << alloc->type() 
<< " vs " << type
-                 << ")";
-  }
-  return alloc;
-}
-
-Allocator* MemoryManager::GetAllocator(Device dev) {
-  MemoryManager* m = MemoryManager::Global();
-  std::lock_guard<std::mutex> lock(m->mutex_);
-  auto it = m->allocators_.find(dev);
-  if (it == m->allocators_.end()) {
-    LOG(FATAL) << "Allocator for " << dev << " has not been created yet.";
-  }
-  return it->second.get();
-}
-
-void MemoryManager::Clear() {
-  MemoryManager* m = MemoryManager::Global();
-  std::lock_guard<std::mutex> lock(m->mutex_);
-  m->allocators_.clear();
-}
-
-Buffer Allocator::Alloc(ShapeTuple shape, DLDataType dtype, String mem_scope) {
-  ICHECK_EQ(shape.size(), 1) << "Allocator of type (" << type_
-                             << ") does not support nD allocation. Please use 
allocator type ("
-                             << AllocatorType::kNaive << ")";
-  CHECK_EQ(mem_scope, "global") << "Allocator of type (" << type_
-                                << ") does not support memory scope " << 
mem_scope
-                                << ". Please use allocator type (" << 
AllocatorType::kNaive << ")";
-
-  DLTensor temp;
-  temp.ndim = shape.size();
-  temp.dtype = dtype;
-  temp.shape = const_cast<int64_t*>(shape.data());
-  temp.strides = nullptr;
-  temp.byte_offset = 0;
-  size_t nbytes = GetDataSize(temp);
-
-  return Alloc(nbytes, runtime::kAllocAlignment, dtype);
-}
-
-runtime::NDArray Allocator::Empty(ShapeTuple shape, DLDataType dtype, DLDevice 
dev) {
-  VerifyDataType(dtype);
-  runtime::NDArray::Container* container =
-      new runtime::NDArray::Container(nullptr, shape, dtype, dev);
-  container->SetDeleter(BufferDeleter);
-  size_t size = runtime::GetDataSize(container->dl_tensor);
-  size_t alignment = GetDataAlignment(container->dl_tensor);
-  Buffer* buffer = new Buffer;
-  *buffer = this->Alloc(size, alignment, dtype);
-  container->manager_ctx = reinterpret_cast<void*>(buffer);
-  container->dl_tensor.data = buffer->data;
-  return runtime::NDArray(runtime::GetObjectPtr<Object>(container));
-}
-
-TVM_REGISTER_GLOBAL("vm.builtin.memory_manager.clear").set_body_typed(MemoryManager::Clear);
-
-}  // namespace relax_vm
-}  // namespace runtime
-}  // namespace tvm
diff --git a/src/runtime/relax_vm/naive_allocator.h 
b/src/runtime/relax_vm/naive_allocator.h
deleted file mode 100644
index dde4a22066..0000000000
--- a/src/runtime/relax_vm/naive_allocator.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/relax_vm/naive_allocator.h
- */
-#ifndef TVM_RUNTIME_RELAX_VM_NAIVE_ALLOCATOR_H_
-#define TVM_RUNTIME_RELAX_VM_NAIVE_ALLOCATOR_H_
-
-#include <tvm/runtime/device_api.h>
-#include <tvm/runtime/relax_vm/memory_manager.h>
-
-#include <atomic>
-
-namespace tvm {
-namespace runtime {
-namespace relax_vm {
-
-class NaiveAllocator final : public Allocator {
- public:
-  explicit NaiveAllocator(Device dev) : Allocator(kNaive), used_memory_(0), 
device_(dev) {}
-
-  Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) override 
{
-    Buffer buf;
-    buf.device = device_;
-    buf.size = nbytes;
-    buf.data =
-        runtime::DeviceAPI::Get(device_)->AllocDataSpace(device_, nbytes, 
alignment, type_hint);
-    used_memory_.fetch_add(nbytes, std::memory_order_relaxed);
-    DLOG(INFO) << "allocate " << nbytes << " B, used memory " << used_memory_ 
<< " B";
-    return buf;
-  }
-
-  Buffer Alloc(ShapeTuple shape, DLDataType dtype, String mem_scope) override {
-    DLTensor temp;
-    temp.data = nullptr;
-    temp.device = device_;
-    temp.ndim = shape.size();
-    temp.dtype = dtype;
-    temp.shape = const_cast<int64_t*>(shape.data());
-    temp.strides = nullptr;
-    temp.byte_offset = 0;
-    size_t nbytes = GetDataSize(temp);
-
-    Buffer buf;
-    buf.device = device_;
-    buf.size = nbytes;
-    buf.data = runtime::DeviceAPI::Get(device_)->AllocDataSpace(device_, 
shape.size(), shape.data(),
-                                                                dtype, 
mem_scope);
-    used_memory_.fetch_add(nbytes, std::memory_order_relaxed);
-    DLOG(INFO) << "allocate " << nbytes << " B, used memory " << used_memory_ 
<< " B";
-    return buf;
-  }
-
-  void Free(const Buffer& buffer) override {
-    runtime::DeviceAPI::Get(device_)->FreeDataSpace(buffer.device, 
buffer.data);
-    used_memory_.fetch_sub(buffer.size, std::memory_order_relaxed);
-    DLOG(INFO) << "free " << buffer.size << " B, used memory " << used_memory_ 
<< " B";
-  }
-
- private:
-  std::atomic<size_t> used_memory_;
-  Device device_;
-};
-
-}  // namespace relax_vm
-}  // namespace runtime
-}  // namespace tvm
-
-#endif  // TVM_RUNTIME_RELAX_VM_NAIVE_ALLOCATOR_H_
diff --git a/src/runtime/relax_vm/pooled_allocator.h 
b/src/runtime/relax_vm/pooled_allocator.h
deleted file mode 100644
index 0dd7d8b027..0000000000
--- a/src/runtime/relax_vm/pooled_allocator.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/relax_vm/pooled_allocator.h
- */
-#ifndef TVM_RUNTIME_RELAX_VM_POOLED_ALLOCATOR_H_
-#define TVM_RUNTIME_RELAX_VM_POOLED_ALLOCATOR_H_
-
-#include <tvm/runtime/device_api.h>
-#include <tvm/runtime/relax_vm/memory_manager.h>
-
-#include <atomic>
-#include <mutex>
-#include <unordered_map>
-#include <vector>
-
-namespace tvm {
-namespace runtime {
-namespace relax_vm {
-
-class PooledAllocator final : public Allocator {
- public:
-  static constexpr size_t kDefaultPageSize = 4096;
-
-  explicit PooledAllocator(Device dev, size_t page_size = kDefaultPageSize)
-      : Allocator(kPooled), page_size_(page_size), used_memory_(0), 
device_(dev) {}
-
-  ~PooledAllocator() { ReleaseAll(); }
-
-  Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) override 
{
-    std::lock_guard<std::recursive_mutex> lock(mu_);
-    size_t size = ((nbytes + page_size_ - 1) / page_size_) * page_size_;
-    auto&& it = memory_pool_.find(size);
-    if (it != memory_pool_.end() && !it->second.empty()) {
-      auto&& pool = it->second;
-      auto ret = pool.back();
-      pool.pop_back();
-      return ret;
-    }
-    Buffer buf;
-    buf.device = device_;
-    buf.size = size;
-    try {
-      buf.data =
-          runtime::DeviceAPI::Get(device_)->AllocDataSpace(device_, size, 
alignment, type_hint);
-    } catch (InternalError& err) {
-      LOG(WARNING) << "PooledAllocator got InternalError during allocation: " 
<< err.message();
-      LOG(WARNING) << "Trying to release all unused memory and reallocate...";
-      ReleaseAll();
-      buf.data =
-          runtime::DeviceAPI::Get(device_)->AllocDataSpace(device_, size, 
alignment, type_hint);
-    }
-
-    used_memory_.fetch_add(size, std::memory_order_relaxed);
-    DLOG(INFO) << "allocate " << size << " B, used memory " << used_memory_ << 
" B";
-    return buf;
-  }
-
-  void Free(const Buffer& buffer) override {
-    std::lock_guard<std::recursive_mutex> lock(mu_);
-    if (memory_pool_.find(buffer.size) == memory_pool_.end()) {
-      memory_pool_.emplace(buffer.size, std::vector<Buffer>{});
-    }
-    memory_pool_.at(buffer.size).push_back(buffer);
-    DLOG(INFO) << "reclaim buffer " << buffer.size;
-  }
-
- private:
-  void ReleaseAll() {
-    std::lock_guard<std::recursive_mutex> lock(mu_);
-    for (auto const& it : memory_pool_) {
-      auto const& pool = it.second;
-      for (auto const& buf : pool) {
-        runtime::DeviceAPI::Get(buf.device)->FreeDataSpace(buf.device, 
buf.data);
-      }
-    }
-    memory_pool_.clear();
-    used_memory_ = 0;
-    DLOG(INFO) << "release all buffers";
-  }
-
- private:
-  size_t page_size_;
-  std::atomic<size_t> used_memory_;
-  std::unordered_map<size_t, std::vector<Buffer> > memory_pool_;
-  std::recursive_mutex mu_;
-  Device device_;
-};
-
-}  // namespace relax_vm
-}  // namespace runtime
-}  // namespace tvm
-
-#endif  // TVM_RUNTIME_RELAX_VM_POOLED_ALLOCATOR_H_
diff --git a/tests/python/relax/test_runtime_builtin.py 
b/tests/python/relax/test_runtime_builtin.py
index e11ee2a22d..0417f99233 100644
--- a/tests/python/relax/test_runtime_builtin.py
+++ b/tests/python/relax/test_runtime_builtin.py
@@ -168,30 +168,6 @@ def test_attention_kv_cache():
         assert res[i][1] == i
 
 
-def test_attention_kv_cache_create_multiple():
-    fcreate = 
tvm.get_global_func("vm.builtin.attention_kv_cache_create_multiple")
-    fappend = tvm.get_global_func("vm.builtin.attention_kv_cache_append")
-    fview = tvm.get_global_func("vm.builtin.attention_kv_cache_view")
-
-    num_caches = 4
-    cache_group = fcreate(
-        tvm.nd.empty((1, 2), dtype="int32"), tvm.runtime.ShapeTuple([7, 2]), 
0, num_caches
-    )
-
-    num_steps = 7
-    for i in range(num_steps):
-        for cache_index in range(num_caches):
-            fappend(
-                cache_group[cache_index],
-                tvm.nd.array(i * cache_index * np.ones((1, 
2)).astype("int32")),
-            )
-            res = fview(cache_group[cache_index], tvm.runtime.ShapeTuple((i + 
1, 2))).numpy()
-            # Also verify that the old values aren't corrupted
-            for j in range(i):
-                assert res[j][0] == j * cache_index
-                assert res[j][1] == j * cache_index
-
-
 def test_ndarray_cache():
     fload = tvm.get_global_func("vm.builtin.ndarray_cache.load")
     fget_params = tvm.get_global_func("vm.builtin.param_array_from_cache")

(tvm) branch unity updated: [Unity] Replace relax_vm/memory_manager with memory/memory_manager (#15882)

Reply via email to