This is an automated email from the ASF dual-hosted git repository.

masahi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new e2c8d7b33e [VM][OpenCL] Introduce textures allocation to VM memory 
manager (#15400)
e2c8d7b33e is described below

commit e2c8d7b33ea158a6775273431cb09aec776d311e
Author: Egor Churaev <[email protected]>
AuthorDate: Thu Jul 27 04:15:02 2023 +0300

    [VM][OpenCL] Introduce textures allocation to VM memory manager (#15400)
    
    * [VM][OpenCL] Introduce textures allocation to VM memory manager
    
    VM memory manager is extended to support allocation memory with
    different memory scope. This functionality is fully implemented for
    naive allocator. For pooled allocator is should be implemented in the
    future.
    
    * Fix lint
    
    * Fix OpenCL tests
---
 include/tvm/runtime/vm/memory_manager.h      |  18 ++-
 src/runtime/vm/memory_manager.cc             |  18 +++
 src/runtime/vm/naive_allocator.h             |  26 ++++
 src/runtime/vm/pooled_allocator.h            |  10 ++
 tests/cpp/runtime/vm/memory_manager_tests.cc | 204 +++++++++++++++++++++++++++
 5 files changed, 275 insertions(+), 1 deletion(-)

diff --git a/include/tvm/runtime/vm/memory_manager.h 
b/include/tvm/runtime/vm/memory_manager.h
index fb2354bca4..feafc01f63 100644
--- a/include/tvm/runtime/vm/memory_manager.h
+++ b/include/tvm/runtime/vm/memory_manager.h
@@ -31,6 +31,7 @@
 #include <functional>
 #include <memory>
 #include <mutex>
+#include <string>
 #include <unordered_map>
 #include <vector>
 
@@ -43,6 +44,8 @@ struct Buffer {
   void* data{nullptr};
   /*! \brief The size of the block. */
   size_t size{0};
+  /*! \brief The shape of the tensor. */
+  std::vector<int64_t> shape;
   /*! \brief The context of the allocated buffers. */
   Device device;
 };
@@ -72,6 +75,15 @@ class Allocator {
    *  \return A sized allocation in the form of a buffer.
    */
   virtual Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) 
= 0;
+  /*! \brief Allocate a buffer given a shape and type.
+   *  \param ndims The rank of the tensor.
+   *  \param shape The shape of the tensor.
+   *  \param type_hint A type hint to the allocator.
+   *  \param mem_scope A memory scope of the buffer.
+   *  \return A sized allocation in the form of a buffer.
+   */
+  virtual Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint,
+                       const std::string& mem_scope = "") = 0;
   /*! \brief Free a buffer allocated by the allocator.
    *  \param buffer The buffer to free.
    */
@@ -81,6 +93,10 @@ class Allocator {
    */
   virtual size_t UsedMemory() const = 0;
 
+ protected:
+  virtual Buffer Alloc(Device dev, int ndims, int64_t* shape, DLDataType 
type_hint,
+                       const std::string& mem_scope);
+
  private:
   AllocatorType type_;
 };
@@ -105,7 +121,7 @@ class MemoryManager {
  private:
   MemoryManager() {}
 
- private:
+ protected:
   std::mutex mu_;
   std::unordered_map<Device, std::unique_ptr<Allocator>> allocators_;
 };
diff --git a/src/runtime/vm/memory_manager.cc b/src/runtime/vm/memory_manager.cc
index 22afcce6a0..2855722a4c 100644
--- a/src/runtime/vm/memory_manager.cc
+++ b/src/runtime/vm/memory_manager.cc
@@ -170,6 +170,24 @@ NDArray Allocator::Empty(std::vector<int64_t> shape, 
DLDataType dtype, DLDevice
   return NDArray(GetObjectPtr<Object>(container));
 }
 
+Buffer Allocator::Alloc(Device dev, int ndims, int64_t* shape, DLDataType 
type_hint,
+                        const std::string& mem_scope) {
+  if (mem_scope.empty() || mem_scope == "global") {
+    // by default, we can always redirect to the flat memory allocations
+    std::vector<int64_t> s;
+    for (int i = 0; i < ndims; ++i) {
+      s.push_back(shape[i]);
+    }
+    NDArray::Container container(nullptr, s, type_hint, dev);
+    size_t size = GetDataSize(container.dl_tensor);
+    size_t alignment = GetDataAlignment(container.dl_tensor);
+    return Alloc(size, alignment, type_hint);
+  }
+  LOG(FATAL) << "Allocator cannot allocate data space with "
+             << "specified memory scope: " << mem_scope;
+  return {};
+}
+
 }  // namespace vm
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/vm/naive_allocator.h b/src/runtime/vm/naive_allocator.h
index 9fce66f606..799f16ad60 100644
--- a/src/runtime/vm/naive_allocator.h
+++ b/src/runtime/vm/naive_allocator.h
@@ -27,6 +27,7 @@
 #include <tvm/runtime/vm/memory_manager.h>
 
 #include <atomic>
+#include <string>
 
 namespace tvm {
 namespace runtime {
@@ -46,6 +47,31 @@ class NaiveAllocator final : public Allocator {
     return buf;
   }
 
+  Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint,
+               const std::string& mem_scope) override {
+    Buffer buf;
+    size_t nbytes = 1;
+    for (int i = 0; i < ndims; ++i) {
+      buf.shape.push_back(shape[i]);
+      nbytes *= static_cast<size_t>(shape[i]);
+    }
+    nbytes *= (type_hint.bits * type_hint.lanes + 7) / 8;
+    buf.device = device_;
+    if (mem_scope.empty() || mem_scope == "global") {
+      auto tmp_buf = Allocator::Alloc(device_, ndims, shape, type_hint, 
mem_scope);
+      buf.size = tmp_buf.size;
+      buf.data = tmp_buf.data;
+      return buf;
+    }
+
+    buf.size = nbytes;
+    buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, ndims, shape, 
type_hint,
+                                                       String(mem_scope));
+    used_memory_.fetch_add(nbytes, std::memory_order_relaxed);
+    DLOG(INFO) << "allocate " << nbytes << " B, used memory " << used_memory_ 
<< " B";
+    return buf;
+  }
+
   void Free(const Buffer& buffer) override {
     DeviceAPI::Get(device_)->FreeDataSpace(buffer.device, buffer.data);
     used_memory_.fetch_sub(buffer.size, std::memory_order_relaxed);
diff --git a/src/runtime/vm/pooled_allocator.h 
b/src/runtime/vm/pooled_allocator.h
index 9c11c78301..ea6059e0c6 100644
--- a/src/runtime/vm/pooled_allocator.h
+++ b/src/runtime/vm/pooled_allocator.h
@@ -28,6 +28,7 @@
 
 #include <atomic>
 #include <mutex>
+#include <string>
 #include <unordered_map>
 #include <vector>
 
@@ -71,6 +72,15 @@ class PooledAllocator final : public Allocator {
     return buf;
   }
 
+  Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint,
+               const std::string& mem_scope) override {
+    if (mem_scope.empty() || mem_scope == "global") {
+      return Allocator::Alloc(device_, ndims, shape, type_hint, mem_scope);
+    }
+    LOG(FATAL) << "This alloc should be implemented";
+    return {};
+  }
+
   void Free(const Buffer& buffer) override {
     std::lock_guard<std::recursive_mutex> lock(mu_);
     if (memory_pool_.find(buffer.size) == memory_pool_.end()) {
diff --git a/tests/cpp/runtime/vm/memory_manager_tests.cc 
b/tests/cpp/runtime/vm/memory_manager_tests.cc
new file mode 100644
index 0000000000..ac1ff201cf
--- /dev/null
+++ b/tests/cpp/runtime/vm/memory_manager_tests.cc
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include <tvm/runtime/vm/memory_manager.h>
+
+#include <exception>
+
+#include "../../../../src/runtime/vm/pooled_allocator.h"
+
+namespace tvm {
+namespace runtime {
+namespace vm {
+
+// MemoryManangerWrapper is necessary because in class MemoryManager we don't 
have access to its
+// protected members. In this class we add a new method which allow us to 
clear internal state of
+// the global memory manager.
+class MemoryManagerWrapper : public MemoryManager {
+ public:
+  static MemoryManagerWrapper* Global() {
+    return reinterpret_cast<MemoryManagerWrapper*>(MemoryManager::Global());
+  }
+  void clear() { allocators_.clear(); }
+};
+
+class TvmVMMemoryManagerTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    // Clear allocators from previous tests
+    MemoryManagerWrapper::Global()->clear();
+  }
+};
+
+TEST_F(TvmVMMemoryManagerTest, NaiveAllocBasic) {
+  Device dev = {kDLCPU, 0};
+  Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, 
kNaive);
+  EXPECT_EQ(allocator->UsedMemory(), 0);
+  auto buff = allocator->Alloc(64, 32, DataType::Float(32));
+  EXPECT_EQ(allocator->UsedMemory(), 64);
+  allocator->Free(buff);
+  EXPECT_EQ(allocator->UsedMemory(), 0);
+}
+
+TEST_F(TvmVMMemoryManagerTest, PooledAllocBasic) {
+  Device dev = {kDLCPU, 0};
+  size_t nbytes = 64;
+  size_t page_size = PooledAllocator::kDefaultPageSize;
+  size_t size = ((nbytes + page_size - 1) / page_size) * page_size;
+  Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, 
kPooled);
+  EXPECT_EQ(allocator->UsedMemory(), 0);
+  auto buff = allocator->Alloc(nbytes, 32, DataType::Float(32));
+  EXPECT_EQ(allocator->UsedMemory(), size);
+  allocator->Free(buff);
+  EXPECT_EQ(allocator->UsedMemory(), size);
+}
+
+TEST_F(TvmVMMemoryManagerTest, NaiveEmptyBasic) {
+  Device dev = {kDLCPU, 0};
+  Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, 
kNaive);
+  EXPECT_EQ(allocator->UsedMemory(), 0);
+  auto dt = DataType::Float(32);
+  size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes();
+  std::vector<int64_t> shape = {1, 3, 6, 6};
+  {
+    auto ndarray = allocator->Empty(shape, dt, dev);
+    EXPECT_EQ(allocator->UsedMemory(), nbytes);
+  }
+  EXPECT_EQ(allocator->UsedMemory(), 0);
+}
+
+TEST_F(TvmVMMemoryManagerTest, PooledEmptyBasic) {
+  Device dev = {kDLCPU, 0};
+  Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, 
kPooled);
+  EXPECT_EQ(allocator->UsedMemory(), 0);
+  auto dt = DataType::Float(32);
+  size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes();
+  size_t page_size = PooledAllocator::kDefaultPageSize;
+  size_t size = ((nbytes + page_size - 1) / page_size) * page_size;
+  std::vector<int64_t> shape = {1, 3, 6, 6};
+  {
+    auto ndarray = allocator->Empty(shape, dt, dev);
+    EXPECT_EQ(allocator->UsedMemory(), size);
+  }
+  EXPECT_EQ(allocator->UsedMemory(), size);
+}
+
+TEST_F(TvmVMMemoryManagerTest, NaiveAllocWithShape) {
+  Device dev = {kDLCPU, 0};
+  Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, 
kNaive);
+  EXPECT_EQ(allocator->UsedMemory(), 0);
+  auto dt = DataType::Float(32);
+  size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes();
+  std::vector<int64_t> shape = {1, 3, 6, 6};
+  auto buff = allocator->Alloc(shape.size(), shape.data(), dt);
+  EXPECT_EQ(allocator->UsedMemory(), nbytes);
+  allocator->Free(buff);
+  EXPECT_EQ(allocator->UsedMemory(), 0);
+
+  try {
+    auto texture = allocator->Alloc(shape.size(), shape.data(), dt, 
"global.texture");
+    FAIL();
+  } catch (std::exception& e) {
+    std::string pattern =
+        "Device does not support allocate data space with specified memory 
scope: global.texture";
+    std::string what = e.what();
+    EXPECT_NE(what.find(pattern), std::string::npos) << what;
+  }
+}
+
+TEST_F(TvmVMMemoryManagerTest, PooledAllocWithShape) {
+  Device dev = {kDLCPU, 0};
+  Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, 
kPooled);
+  EXPECT_EQ(allocator->UsedMemory(), 0);
+  auto dt = DataType::Float(32);
+  size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes();
+  size_t page_size = PooledAllocator::kDefaultPageSize;
+  size_t size = ((nbytes + page_size - 1) / page_size) * page_size;
+  std::vector<int64_t> shape = {1, 3, 6, 6};
+  auto buff = allocator->Alloc(shape.size(), shape.data(), dt);
+  EXPECT_EQ(allocator->UsedMemory(), size);
+  allocator->Free(buff);
+  EXPECT_EQ(allocator->UsedMemory(), size);
+
+  try {
+    auto texture = allocator->Alloc(shape.size(), shape.data(), dt, 
"global.texture");
+    FAIL();
+  } catch (std::exception& e) {
+    std::string pattern = "This alloc should be implemented";
+    std::string what = e.what();
+    EXPECT_NE(what.find(pattern), std::string::npos) << what;
+  }
+}
+
+TEST_F(TvmVMMemoryManagerTest, NaiveAllocOpenCLTexture) {
+  bool enabled = tvm::runtime::RuntimeEnabled("opencl");
+  if (!enabled) {
+    LOG(INFO) << "Skip OpenCL Texture alloc test because opencl runtime is 
disabled.\n";
+    return;
+  }
+  Device dev = {kDLOpenCL, 0};
+  Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, 
kNaive);
+  EXPECT_EQ(allocator->UsedMemory(), 0);
+  auto dt = DataType::Float(32);
+  size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes();
+  std::vector<int64_t> shape = {1, 3, 6, 6};
+  auto buff = allocator->Alloc(shape.size(), shape.data(), dt);
+  EXPECT_EQ(allocator->UsedMemory(), nbytes);
+  allocator->Free(buff);
+  EXPECT_EQ(allocator->UsedMemory(), 0);
+
+  auto texture = allocator->Alloc(shape.size(), shape.data(), dt, 
"global.texture");
+  EXPECT_EQ(allocator->UsedMemory(), nbytes);
+  allocator->Free(texture);
+  EXPECT_EQ(allocator->UsedMemory(), 0);
+}
+
+TEST_F(TvmVMMemoryManagerTest, PooledAllocOpenCLTexture) {
+  bool enabled = tvm::runtime::RuntimeEnabled("opencl");
+  if (!enabled) {
+    LOG(INFO) << "Skip OpenCL Texture alloc test because opencl runtime is 
disabled.\n";
+    return;
+  }
+  Device dev = {kDLOpenCL, 0};
+  Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, 
kPooled);
+  EXPECT_EQ(allocator->UsedMemory(), 0);
+  auto dt = DataType::Float(32);
+  size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes();
+  size_t page_size = PooledAllocator::kDefaultPageSize;
+  size_t size = ((nbytes + page_size - 1) / page_size) * page_size;
+  std::vector<int64_t> shape = {1, 3, 6, 6};
+  auto buff = allocator->Alloc(shape.size(), shape.data(), dt);
+  EXPECT_EQ(allocator->UsedMemory(), size);
+  allocator->Free(buff);
+  EXPECT_EQ(allocator->UsedMemory(), size);
+
+  try {
+    auto texture = allocator->Alloc(shape.size(), shape.data(), dt, 
"global.texture");
+    FAIL();
+  } catch (std::exception& e) {
+    std::string pattern = "This alloc should be implemented";
+    std::string what = e.what();
+    EXPECT_NE(what.find(pattern), std::string::npos) << what;
+  }
+}
+}  // namespace vm
+}  // namespace runtime
+}  // namespace tvm

Reply via email to