This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/unity by this push:
new a9c81a7cc1 [Unity] Replace relax_vm/memory_manager with
memory/memory_manager (#15882)
a9c81a7cc1 is described below
commit a9c81a7cc114fd98065223a4d5be1952b06196b2
Author: Yong Wu <[email protected]>
AuthorDate: Tue Oct 31 10:37:25 2023 -0700
[Unity] Replace relax_vm/memory_manager with memory/memory_manager (#15882)
---
include/tvm/runtime/relax_vm/memory_manager.h | 152 ------------------
include/tvm/runtime/relax_vm/vm.h | 9 +-
src/runtime/disco/builtin.cc | 4 +-
src/runtime/relax_vm/builtin.cc | 2 +-
src/runtime/relax_vm/lm_support.cc | 46 +-----
src/runtime/relax_vm/memory_manager.cc | 214 --------------------------
src/runtime/relax_vm/naive_allocator.h | 86 -----------
src/runtime/relax_vm/pooled_allocator.h | 111 -------------
tests/python/relax/test_runtime_builtin.py | 24 ---
9 files changed, 12 insertions(+), 636 deletions(-)
diff --git a/include/tvm/runtime/relax_vm/memory_manager.h
b/include/tvm/runtime/relax_vm/memory_manager.h
deleted file mode 100644
index ed939fb88f..0000000000
--- a/include/tvm/runtime/relax_vm/memory_manager.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/relax_vm/memory_manager.h
- * \brief Abstract device memory management API
- */
-#ifndef TVM_RUNTIME_RELAX_VM_MEMORY_MANAGER_H_
-#define TVM_RUNTIME_RELAX_VM_MEMORY_MANAGER_H_
-
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/ndarray.h>
-
-#include <functional>
-#include <memory>
-#include <mutex>
-#include <unordered_map>
-#include <vector>
-
-namespace tvm {
-namespace runtime {
-namespace relax_vm {
-
-struct Buffer {
- /*! \brief The pointer to the allocated block of memory. */
- void* data{nullptr};
- /*! \brief The size of the block. */
- size_t size{0};
- /*! \brief The device of the allocated buffers. */
- Device device;
-};
-
-enum AllocatorType {
- kNaive = 1,
- kPooled,
-};
-
-class Allocator {
- public:
- explicit Allocator(AllocatorType type) : type_(type) {}
- virtual ~Allocator() = default;
- /*! \brief Allocate an empty NDArray using from the allocator.
- * \param shape The shape of the NDArray.
- * \param dtype The datatype of the NDArray.
- * \param dev The device where the array is allocated.
- * \return The empty NDArray.
- */
- runtime::NDArray Empty(ShapeTuple shape, DLDataType dtype, Device dev);
- /*! \brief Return the allocator type. */
- inline AllocatorType type() const { return type_; }
- /*! \brief Allocate a buffer given a size, alignment and type.
- * \param nbytes The size of the buffer.
- * \param alignment The alignment of the buffer.
- * \param type_hint A type hint to the allocator.
- * \return A sized allocation in the form of a buffer.
- */
- virtual Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint)
= 0;
- /*! \brief Allocate a buffer given a size, alignment and type.
- * \param shape The shape of allocated tensor.
- * \param dtype A type hint to the allocator.
- * \param mem_scope The memory scope of allocated tensor.
- * \return A sized allocation in the form of a buffer.
- */
- virtual Buffer Alloc(ShapeTuple shape, DLDataType dtype, String mem_scope);
- /*! \brief Free a buffer allocated by the allocator.
- * \param buffer The buffer to free.
- */
- virtual void Free(const Buffer& buffer) = 0;
-
- private:
- AllocatorType type_;
-};
-
-class MemoryManager {
- public:
- static MemoryManager* Global();
- /*!
- * \brief Get or create an allocator given the device and allocator type.
- * \param dev The TVM device
- * \param type The allocator type
- * \return The memory allocator.
- */
- static Allocator* GetOrCreateAllocator(Device dev, AllocatorType type);
- /*!
- * \brief Get an allocator given the device.
- * \param dev The TVM device
- * \return The memory allocator.
- */
- static Allocator* GetAllocator(Device dev);
-
- /*! \brief Clear the allocators. */
- static void Clear();
-
- private:
- MemoryManager() {}
-
- private:
- std::mutex mutex_;
- std::unordered_map<Device, std::unique_ptr<Allocator>> allocators_;
-};
-
-/*! \brief An object representing a storage allocation. */
-class StorageObj : public Object {
- public:
- /*! \brief The index into the VM function table. */
- Buffer buffer;
-
- /*! \brief Allocate an NDArray from a given piece of storage. */
- runtime::NDArray AllocNDArray(uint64_t offset, ShapeTuple shape, DLDataType
dtype);
-
- /*! \brief The deleter for an NDArray when allocated from underlying
storage. */
- static void Deleter(Object* ptr);
-
- ~StorageObj() {
- auto alloc = MemoryManager::Global()->GetAllocator(buffer.device);
- alloc->Free(buffer);
- }
-
- static constexpr const uint32_t _type_index = runtime::TypeIndex::kDynamic;
- static constexpr const char* _type_key = "relax.Storage";
- TVM_DECLARE_FINAL_OBJECT_INFO(StorageObj, Object);
-};
-
-/*! \brief reference to storage. */
-class Storage : public ObjectRef {
- public:
- explicit Storage(Buffer buffer);
-
- TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Storage, ObjectRef, StorageObj);
-};
-
-} // namespace relax_vm
-} // namespace runtime
-} // namespace tvm
-
-#endif // TVM_RUNTIME_RELAX_VM_MEMORY_MANAGER_H_
diff --git a/include/tvm/runtime/relax_vm/vm.h
b/include/tvm/runtime/relax_vm/vm.h
index 95a2080159..4e1f1361be 100644
--- a/include/tvm/runtime/relax_vm/vm.h
+++ b/include/tvm/runtime/relax_vm/vm.h
@@ -31,12 +31,19 @@
#include <string>
#include <vector>
+#include "../memory/memory_manager.h"
#include "./bytecode.h"
#include "./executable.h"
-#include "./memory_manager.h"
namespace tvm {
namespace runtime {
+
+using memory::Allocator;
+using memory::AllocatorType;
+using memory::MemoryManager;
+using memory::Storage;
+using memory::StorageObj;
+
namespace relax_vm {
/*!
diff --git a/src/runtime/disco/builtin.cc b/src/runtime/disco/builtin.cc
index 5aea39cf66..514d633fa6 100644
--- a/src/runtime/disco/builtin.cc
+++ b/src/runtime/disco/builtin.cc
@@ -62,8 +62,8 @@ Module LoadVMModule(std::string path, Device device) {
<< "ValueError: File `" << path
<< "` is not built by RelaxVM, because `vm_initialization` does not
exist";
vm_initialization(static_cast<int>(device.device_type),
static_cast<int>(device.device_id),
- static_cast<int>(relax_vm::AllocatorType::kPooled),
static_cast<int>(kDLCPU), 0,
- static_cast<int>(relax_vm::AllocatorType::kPooled));
+ static_cast<int>(AllocatorType::kPooled),
static_cast<int>(kDLCPU), 0,
+ static_cast<int>(AllocatorType::kPooled));
return mod;
}
diff --git a/src/runtime/relax_vm/builtin.cc b/src/runtime/relax_vm/builtin.cc
index a764c34cfa..4cd711dea0 100644
--- a/src/runtime/relax_vm/builtin.cc
+++ b/src/runtime/relax_vm/builtin.cc
@@ -24,12 +24,12 @@
#include <tvm/runtime/device_api.h>
#include <tvm/runtime/logging.h>
#include <tvm/runtime/memory.h>
+#include <tvm/runtime/memory/memory_manager.h>
#include <tvm/runtime/ndarray.h>
#include <tvm/runtime/packed_func.h>
#include <tvm/runtime/registry.h>
#include <tvm/runtime/relax_vm/builtin.h>
#include <tvm/runtime/relax_vm/bytecode.h>
-#include <tvm/runtime/relax_vm/memory_manager.h>
#include <tvm/runtime/relax_vm/vm.h>
#include "../runtime_base.h"
diff --git a/src/runtime/relax_vm/lm_support.cc
b/src/runtime/relax_vm/lm_support.cc
index 5ff04ebf2a..e56a03fdea 100644
--- a/src/runtime/relax_vm/lm_support.cc
+++ b/src/runtime/relax_vm/lm_support.cc
@@ -39,8 +39,8 @@
#include <tvm/runtime/device_api.h>
#include <tvm/runtime/logging.h>
#include <tvm/runtime/memory.h>
+#include <tvm/runtime/memory/memory_manager.h>
#include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/relax_vm/memory_manager.h>
#include <tvm/runtime/relax_vm/vm.h>
#include <cmath>
@@ -245,56 +245,12 @@ class AttentionKVCache : public ObjectRef {
TVM_REGISTER_OBJECT_TYPE(AttentionKVCacheObj);
-/*!
- * \brief Create multiple kv caches with same shape, from single memory
allocation.
- * \param init_data The initial data to put into the cache. Ignored if
init_fill_count is
- * less than 0.
- * \param reserve_shape The shape of cache.
- * \param init_fill_count The initial row to fill into
- * the cache.
- * \param num_caches Number of caches to create.
- */
-Array<AttentionKVCache> CreateMultipleKVCaches(NDArray init_data, ShapeTuple
reserve_shape,
- int init_fill_count, int
num_caches) {
- DLDataType dtype = init_data->dtype;
-
- int64_t cache_size = (dtype.bits * dtype.lanes + 7) / 8;
- for (const auto dim : reserve_shape) {
- cache_size *= dim;
- }
-
- // Add padding to make each cache align to kAllocAlignment
- using tvm::runtime::kAllocAlignment;
- int64_t padding = (kAllocAlignment - cache_size % kAllocAlignment) %
kAllocAlignment;
- int64_t cache_offset = cache_size + padding;
-
- Storage storage =
- Storage(MemoryManager::GetOrCreateAllocator(init_data->device,
AllocatorType::kNaive)
- ->Alloc(cache_offset * num_caches, kAllocAlignment, dtype));
-
- Array<AttentionKVCache> result;
- for (int i = 0; i < num_caches; ++i) {
- auto c = make_object<AttentionKVCacheObj>();
- c->data = storage->AllocNDArray(i * cache_offset, reserve_shape, dtype);
- c->fill_count = 0;
- if (init_fill_count > 0) {
- c->Append(init_data);
- c->fill_count = init_fill_count;
- }
- result.push_back(AttentionKVCache(c));
- }
- return result;
-}
-
//-------------------------------------------------
// Register runtime functions
//-------------------------------------------------
TVM_REGISTER_GLOBAL("vm.builtin.attention_kv_cache_create")
.set_body_typed(AttentionKVCache::Create);
-TVM_REGISTER_GLOBAL("vm.builtin.attention_kv_cache_create_multiple")
- .set_body_typed(CreateMultipleKVCaches);
-
AttentionKVCache AttentionKVCacheUpdate(AttentionKVCache cache, NDArray value)
{
cache->Update(value);
return cache;
diff --git a/src/runtime/relax_vm/memory_manager.cc
b/src/runtime/relax_vm/memory_manager.cc
deleted file mode 100644
index 66aaf473ea..0000000000
--- a/src/runtime/relax_vm/memory_manager.cc
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/relax_vm/memory_manager.cc
- * \brief Allocate and manage memory for the Relay VM.
- */
-#include <tvm/runtime/device_api.h>
-#include <tvm/runtime/memory.h>
-#include <tvm/runtime/registry.h>
-#include <tvm/runtime/relax_vm/memory_manager.h>
-
-#include <memory>
-#include <utility>
-
-#include "naive_allocator.h"
-#include "pooled_allocator.h"
-
-namespace tvm {
-namespace runtime {
-namespace relax_vm {
-
-static void BufferDeleter(Object* obj) {
- auto* ptr = static_cast<runtime::NDArray::Container*>(obj);
- ICHECK(ptr->manager_ctx != nullptr);
- Buffer* buffer = reinterpret_cast<Buffer*>(ptr->manager_ctx);
- MemoryManager::GetAllocator(buffer->device)->Free(*(buffer));
- delete buffer;
- delete ptr;
-}
-
-void StorageObj::Deleter(Object* obj) {
- auto* ptr = static_cast<runtime::NDArray::Container*>(obj);
- // When invoking AllocNDArray we don't own the underlying allocation
- // and should not delete the buffer, but instead let it be reclaimed
- // by the storage object's destructor.
- //
- // We did bump the reference count by 1 to keep alive the StorageObj
- // allocation in case this NDArray is the sole owner.
- //
- // We decrement the object allowing for the buffer to release our
- // reference count from allocation.
- StorageObj* storage = reinterpret_cast<StorageObj*>(ptr->manager_ctx);
- storage->DecRef();
- delete ptr;
-}
-
-Storage::Storage(Buffer buffer) {
- auto n = make_object<StorageObj>();
- n->buffer = std::move(buffer);
- data_ = std::move(n);
-}
-
-inline void VerifyDataType(DLDataType dtype) {
- ICHECK_GE(dtype.lanes, 1);
- if (dtype.code == kDLFloat) {
- ICHECK_EQ(dtype.bits % 8, 0);
- } else {
- // allow uint1 as a special flag for bool.
- if (dtype.bits == 1 && dtype.code == kDLUInt) return;
- ICHECK_EQ(dtype.bits % 8, 0);
- }
- ICHECK_EQ(dtype.bits & (dtype.bits - 1), 0);
-}
-
-inline size_t GetDataAlignment(const DLTensor& arr) {
- size_t align = (arr.dtype.bits / 8) * arr.dtype.lanes;
- if (align < runtime::kAllocAlignment) return runtime::kAllocAlignment;
- return align;
-}
-
-runtime::NDArray StorageObj::AllocNDArray(uint64_t offset, ShapeTuple shape,
DLDataType dtype) {
- VerifyDataType(dtype);
-
- // critical zone: allocate header, cannot throw
- runtime::NDArray::Container* container =
- new runtime::NDArray::Container(nullptr, shape, dtype,
this->buffer.device);
-
- container->SetDeleter(StorageObj::Deleter);
- size_t needed_size = runtime::GetDataSize(container->dl_tensor);
- this->IncRef();
- // The manager context pointer must continue to point to the storage object
- // which owns the backing memory, and keeps track of the reference count.
- //
- // When we free a container we extract the storage object, decrement its
- // reference count, then destroy the container, but leave the underlying
- // buffer intact.
- container->manager_ctx = reinterpret_cast<void*>(this);
-
- // is this UB?
- // The only change we make w.r.t offset is modifying the data pointer
- // of the backing tensor to point into the buffer instead of its start.
- auto offset_ptr = reinterpret_cast<uint8_t*>(this->buffer.data) + offset;
- container->dl_tensor.data = reinterpret_cast<void*>(offset_ptr);
-
- runtime::NDArray ret(runtime::GetObjectPtr<Object>(container));
- // RAII in effect, now run the check.
-
- ICHECK(offset + needed_size <= this->buffer.size)
- << "storage allocation failure, attempted to allocate " << needed_size
<< " at offset "
- << offset << " in region that is " << this->buffer.size << "bytes";
-
- return ret;
-}
-
-MemoryManager* MemoryManager::Global() {
- // NOTE: explicitly use new to avoid exit-time destruction of global state
- // Global state will be recycled by OS as the process exits.
- static auto* inst = new MemoryManager();
- return inst;
-}
-
-Allocator* MemoryManager::GetOrCreateAllocator(Device dev, AllocatorType type)
{
- MemoryManager* m = MemoryManager::Global();
- std::lock_guard<std::mutex> lock(m->mutex_);
- if (m->allocators_.find(dev) == m->allocators_.end()) {
- std::unique_ptr<Allocator> alloc;
- switch (type) {
- case kNaive: {
- DLOG(INFO) << "New naive allocator for " << dev;
- alloc.reset(new NaiveAllocator(dev));
- break;
- }
- case kPooled: {
- DLOG(INFO) << "New pooled allocator for " << dev;
- alloc.reset(new PooledAllocator(dev));
- break;
- }
- default:
- LOG(FATAL) << "Unknown allocator type: " << type;
- }
- auto ret = alloc.get();
- m->allocators_.emplace(dev, std::move(alloc));
- return ret;
- }
- auto alloc = m->allocators_.at(dev).get();
- if (alloc->type() != type) {
- LOG(WARNING) << "The type of existing allocator for " << dev
- << " is different from the request type (" << alloc->type()
<< " vs " << type
- << ")";
- }
- return alloc;
-}
-
-Allocator* MemoryManager::GetAllocator(Device dev) {
- MemoryManager* m = MemoryManager::Global();
- std::lock_guard<std::mutex> lock(m->mutex_);
- auto it = m->allocators_.find(dev);
- if (it == m->allocators_.end()) {
- LOG(FATAL) << "Allocator for " << dev << " has not been created yet.";
- }
- return it->second.get();
-}
-
-void MemoryManager::Clear() {
- MemoryManager* m = MemoryManager::Global();
- std::lock_guard<std::mutex> lock(m->mutex_);
- m->allocators_.clear();
-}
-
-Buffer Allocator::Alloc(ShapeTuple shape, DLDataType dtype, String mem_scope) {
- ICHECK_EQ(shape.size(), 1) << "Allocator of type (" << type_
- << ") does not support nD allocation. Please use
allocator type ("
- << AllocatorType::kNaive << ")";
- CHECK_EQ(mem_scope, "global") << "Allocator of type (" << type_
- << ") does not support memory scope " <<
mem_scope
- << ". Please use allocator type (" <<
AllocatorType::kNaive << ")";
-
- DLTensor temp;
- temp.ndim = shape.size();
- temp.dtype = dtype;
- temp.shape = const_cast<int64_t*>(shape.data());
- temp.strides = nullptr;
- temp.byte_offset = 0;
- size_t nbytes = GetDataSize(temp);
-
- return Alloc(nbytes, runtime::kAllocAlignment, dtype);
-}
-
-runtime::NDArray Allocator::Empty(ShapeTuple shape, DLDataType dtype, DLDevice
dev) {
- VerifyDataType(dtype);
- runtime::NDArray::Container* container =
- new runtime::NDArray::Container(nullptr, shape, dtype, dev);
- container->SetDeleter(BufferDeleter);
- size_t size = runtime::GetDataSize(container->dl_tensor);
- size_t alignment = GetDataAlignment(container->dl_tensor);
- Buffer* buffer = new Buffer;
- *buffer = this->Alloc(size, alignment, dtype);
- container->manager_ctx = reinterpret_cast<void*>(buffer);
- container->dl_tensor.data = buffer->data;
- return runtime::NDArray(runtime::GetObjectPtr<Object>(container));
-}
-
-TVM_REGISTER_GLOBAL("vm.builtin.memory_manager.clear").set_body_typed(MemoryManager::Clear);
-
-} // namespace relax_vm
-} // namespace runtime
-} // namespace tvm
diff --git a/src/runtime/relax_vm/naive_allocator.h
b/src/runtime/relax_vm/naive_allocator.h
deleted file mode 100644
index dde4a22066..0000000000
--- a/src/runtime/relax_vm/naive_allocator.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/relax_vm/naive_allocator.h
- */
-#ifndef TVM_RUNTIME_RELAX_VM_NAIVE_ALLOCATOR_H_
-#define TVM_RUNTIME_RELAX_VM_NAIVE_ALLOCATOR_H_
-
-#include <tvm/runtime/device_api.h>
-#include <tvm/runtime/relax_vm/memory_manager.h>
-
-#include <atomic>
-
-namespace tvm {
-namespace runtime {
-namespace relax_vm {
-
-class NaiveAllocator final : public Allocator {
- public:
- explicit NaiveAllocator(Device dev) : Allocator(kNaive), used_memory_(0),
device_(dev) {}
-
- Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) override
{
- Buffer buf;
- buf.device = device_;
- buf.size = nbytes;
- buf.data =
- runtime::DeviceAPI::Get(device_)->AllocDataSpace(device_, nbytes,
alignment, type_hint);
- used_memory_.fetch_add(nbytes, std::memory_order_relaxed);
- DLOG(INFO) << "allocate " << nbytes << " B, used memory " << used_memory_
<< " B";
- return buf;
- }
-
- Buffer Alloc(ShapeTuple shape, DLDataType dtype, String mem_scope) override {
- DLTensor temp;
- temp.data = nullptr;
- temp.device = device_;
- temp.ndim = shape.size();
- temp.dtype = dtype;
- temp.shape = const_cast<int64_t*>(shape.data());
- temp.strides = nullptr;
- temp.byte_offset = 0;
- size_t nbytes = GetDataSize(temp);
-
- Buffer buf;
- buf.device = device_;
- buf.size = nbytes;
- buf.data = runtime::DeviceAPI::Get(device_)->AllocDataSpace(device_,
shape.size(), shape.data(),
- dtype,
mem_scope);
- used_memory_.fetch_add(nbytes, std::memory_order_relaxed);
- DLOG(INFO) << "allocate " << nbytes << " B, used memory " << used_memory_
<< " B";
- return buf;
- }
-
- void Free(const Buffer& buffer) override {
- runtime::DeviceAPI::Get(device_)->FreeDataSpace(buffer.device,
buffer.data);
- used_memory_.fetch_sub(buffer.size, std::memory_order_relaxed);
- DLOG(INFO) << "free " << buffer.size << " B, used memory " << used_memory_
<< " B";
- }
-
- private:
- std::atomic<size_t> used_memory_;
- Device device_;
-};
-
-} // namespace relax_vm
-} // namespace runtime
-} // namespace tvm
-
-#endif // TVM_RUNTIME_RELAX_VM_NAIVE_ALLOCATOR_H_
diff --git a/src/runtime/relax_vm/pooled_allocator.h
b/src/runtime/relax_vm/pooled_allocator.h
deleted file mode 100644
index 0dd7d8b027..0000000000
--- a/src/runtime/relax_vm/pooled_allocator.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/runtime/relax_vm/pooled_allocator.h
- */
-#ifndef TVM_RUNTIME_RELAX_VM_POOLED_ALLOCATOR_H_
-#define TVM_RUNTIME_RELAX_VM_POOLED_ALLOCATOR_H_
-
-#include <tvm/runtime/device_api.h>
-#include <tvm/runtime/relax_vm/memory_manager.h>
-
-#include <atomic>
-#include <mutex>
-#include <unordered_map>
-#include <vector>
-
-namespace tvm {
-namespace runtime {
-namespace relax_vm {
-
-class PooledAllocator final : public Allocator {
- public:
- static constexpr size_t kDefaultPageSize = 4096;
-
- explicit PooledAllocator(Device dev, size_t page_size = kDefaultPageSize)
- : Allocator(kPooled), page_size_(page_size), used_memory_(0),
device_(dev) {}
-
- ~PooledAllocator() { ReleaseAll(); }
-
- Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) override
{
- std::lock_guard<std::recursive_mutex> lock(mu_);
- size_t size = ((nbytes + page_size_ - 1) / page_size_) * page_size_;
- auto&& it = memory_pool_.find(size);
- if (it != memory_pool_.end() && !it->second.empty()) {
- auto&& pool = it->second;
- auto ret = pool.back();
- pool.pop_back();
- return ret;
- }
- Buffer buf;
- buf.device = device_;
- buf.size = size;
- try {
- buf.data =
- runtime::DeviceAPI::Get(device_)->AllocDataSpace(device_, size,
alignment, type_hint);
- } catch (InternalError& err) {
- LOG(WARNING) << "PooledAllocator got InternalError during allocation: "
<< err.message();
- LOG(WARNING) << "Trying to release all unused memory and reallocate...";
- ReleaseAll();
- buf.data =
- runtime::DeviceAPI::Get(device_)->AllocDataSpace(device_, size,
alignment, type_hint);
- }
-
- used_memory_.fetch_add(size, std::memory_order_relaxed);
- DLOG(INFO) << "allocate " << size << " B, used memory " << used_memory_ <<
" B";
- return buf;
- }
-
- void Free(const Buffer& buffer) override {
- std::lock_guard<std::recursive_mutex> lock(mu_);
- if (memory_pool_.find(buffer.size) == memory_pool_.end()) {
- memory_pool_.emplace(buffer.size, std::vector<Buffer>{});
- }
- memory_pool_.at(buffer.size).push_back(buffer);
- DLOG(INFO) << "reclaim buffer " << buffer.size;
- }
-
- private:
- void ReleaseAll() {
- std::lock_guard<std::recursive_mutex> lock(mu_);
- for (auto const& it : memory_pool_) {
- auto const& pool = it.second;
- for (auto const& buf : pool) {
- runtime::DeviceAPI::Get(buf.device)->FreeDataSpace(buf.device,
buf.data);
- }
- }
- memory_pool_.clear();
- used_memory_ = 0;
- DLOG(INFO) << "release all buffers";
- }
-
- private:
- size_t page_size_;
- std::atomic<size_t> used_memory_;
- std::unordered_map<size_t, std::vector<Buffer> > memory_pool_;
- std::recursive_mutex mu_;
- Device device_;
-};
-
-} // namespace relax_vm
-} // namespace runtime
-} // namespace tvm
-
-#endif // TVM_RUNTIME_RELAX_VM_POOLED_ALLOCATOR_H_
diff --git a/tests/python/relax/test_runtime_builtin.py
b/tests/python/relax/test_runtime_builtin.py
index e11ee2a22d..0417f99233 100644
--- a/tests/python/relax/test_runtime_builtin.py
+++ b/tests/python/relax/test_runtime_builtin.py
@@ -168,30 +168,6 @@ def test_attention_kv_cache():
assert res[i][1] == i
-def test_attention_kv_cache_create_multiple():
- fcreate =
tvm.get_global_func("vm.builtin.attention_kv_cache_create_multiple")
- fappend = tvm.get_global_func("vm.builtin.attention_kv_cache_append")
- fview = tvm.get_global_func("vm.builtin.attention_kv_cache_view")
-
- num_caches = 4
- cache_group = fcreate(
- tvm.nd.empty((1, 2), dtype="int32"), tvm.runtime.ShapeTuple([7, 2]),
0, num_caches
- )
-
- num_steps = 7
- for i in range(num_steps):
- for cache_index in range(num_caches):
- fappend(
- cache_group[cache_index],
- tvm.nd.array(i * cache_index * np.ones((1,
2)).astype("int32")),
- )
- res = fview(cache_group[cache_index], tvm.runtime.ShapeTuple((i +
1, 2))).numpy()
- # Also verify that the old values aren't corrupted
- for j in range(i):
- assert res[j][0] == j * cache_index
- assert res[j][1] == j * cache_index
-
-
def test_ndarray_cache():
fload = tvm.get_global_func("vm.builtin.ndarray_cache.load")
fget_params = tvm.get_global_func("vm.builtin.param_array_from_cache")