[GitHub] [tvm] echuraev commented on a diff in pull request #15058: [OpenCL][Texture] Improved texture memory planning

via GitHub Wed, 02 Aug 2023 13:30:02 -0700


echuraev commented on code in PR #15058:
URL: https://github.com/apache/tvm/pull/15058#discussion_r1282310929



##########
src/runtime/c_runtime_api.cc:
##########
@@ -171,6 +171,25 @@ void* DeviceAPI::AllocDataSpace(Device dev, int ndim, 
const int64_t* shape, DLDa
   return nullptr;
 }
 
+void* DeviceAPI::AllocDataSpaceView(Device dev, void* data, int ndim, const 
int64_t* shape,
+                                    DLDataType dtype, Optional<String> 
mem_scope) {
+  return data;
+}
+
+size_t DeviceAPI::GetDataSize(const DLTensor& arr, Optional<String> mem_scope) 
{
+  if (!mem_scope.defined() || mem_scope.value() == "global") {

Review Comment:
   ```suggestion
     if (!mem_scope.defined() || mem_scope.value().empty() || mem_scope.value() 
== "global") {
   ```



##########
src/runtime/opencl/opencl_device_api.cc:
##########
@@ -222,8 +227,61 @@ void* 
OpenCLWorkspace::CreateHostPtrIfEnabled(cl::BufferDescriptor* desc, Device
 void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t size, size_t 
alignment,
                                       DLDataType type_hint) {
   this->Init();
+  return AllocCLBuffer(dev, size, alignment, type_hint);
+}
+
+void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t width, size_t height, 
DLDataType type_hint,
+                                      Optional<String> mem_scope) {
+  // Texture allocation given width and height
+  cl_uint row_align = GetImageAlignment(dev.device_id);
+  size_t pixel_size = (type_hint.bits * type_hint.lanes + 7) / 8;
+  size_t row_pitch = ALIGN_UP(width * pixel_size * 4, row_align);  // CL_RGBA 
= 4
+  size_t mem_size = row_pitch * height;
+
+  // Alloc back buffer from pool
+  cl::BufferDescriptor* back_buffer = nullptr;
+  if (IsBufferToImageSupported(dev.device_id)) {
+    auto buf = MemoryManager::GetOrCreateAllocator(dev, AllocatorType::kPooled)
+                   ->Alloc(mem_size, kTempAllocaAlignment, type_hint);
+    back_buffer = static_cast<cl::BufferDescriptor*>(buf.data);
+    back_buffer->mbuf = buf;
+  }
+
+  if (!mem_scope.defined()) {
+    mem_scope = Optional<String>("global.texture");
+  }

Review Comment:
   Probably `mem_scope` shouldn't be optional for this method and an exception 
can be generated in case if it is empty or `global`.



##########
src/runtime/opencl/opencl_common.h:
##########
@@ -311,16 +324,25 @@ class OpenCLWorkspace : public DeviceAPI {
   void* AllocDataSpace(Device dev, size_t size, size_t alignment, DLDataType 
type_hint) final;
   void* AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType 
dtype,
                        Optional<String> mem_scope = NullOpt) final;
+  void* AllocDataSpace(Device dev, size_t width, size_t height, DLDataType 
type_hint,
+                       Optional<String> mem_scope = NullOpt);
+
+  void* AllocDataSpaceView(Device dev, void* data, int ndim, const int64_t* 
shape, DLDataType dtype,
+                           Optional<String> mem_scope = NullOpt) final;
+
   void* GetNativePtr(const tvm::runtime::NDArray& narr);
   void FreeDataSpace(Device dev, void* ptr) final;
+  void FreeDataSpaceView(Device dev, void* ptr) final;
   void StreamSync(Device dev, TVMStreamHandle stream) final;
   void* AllocWorkspace(Device dev, size_t size, DLDataType type_hint) final;
   void FreeWorkspace(Device dev, void* data) final;
+  size_t GetDataSize(const DLTensor& arr, Optional<String> mem_scope = 
NullOpt) final;
 
-  // Texture (image2d_t) alloca APIs
-  cl_mem AllocTexture(Device dev, size_t width, size_t height, DLDataType 
type_hint);
-  void* AllocTextureWorkspace(Device dev, size_t width, size_t height, 
DLDataType type_hint);
-  void FreeTextureWorkspace(Device dev, void* data);
+  // cl_mem alloc utils
+  void* AllocCLBuffer(Device dev, size_t size, size_t alignment, DLDataType 
type_hint);
+  void* AllocCLImage(Device dev, void* back_buffer, size_t width, size_t 
height, size_t row_pitch,
+                     DLDataType type_hint, Optional<String> mem_scope);
+  void FreeCLBuffer(Device dev, void* ptr);

Review Comment:
   Probably this function can be better renamed to `FreeCLMem(Device dev, void* 
ptr)`. Because this method also might be used for `CLImage`. Furthermore, I 
didn't find implementation of this method in `opencl_device_api.cc`.



##########
src/relay/backend/token_allocator.h:
##########
@@ -105,54 +105,22 @@ class TokenAllocator1D {
    * \param tok The token to be released.
    */
   void CheckForRelease(StorageToken* tok);
-
- private:
-  // scale used for rough match
-  const size_t match_range_{16};
-  // free list of storage entry
-  std::multimap<size_t, StorageToken*> free_;
-  // all the storage resources available
-  std::vector<StorageToken*> data_;
-};
-
-/**
- * @brief Memory manager for 2d memory (textures)
- */
-class TokenAllocator2D {
- public:
-  /*!
-   * \brief Request a storage token for a given prototype.
-   * \param prototype. The prototype storage token.
-   * \return The result token.
-   */
-  StorageToken* Request(StorageToken* prototype);
-  /*!
-   * \brief Alloacte a storage token by consuming prototype
-   * \param prototype The prototype token.
-   * \param size The size of memory being requested.
-   */
-  StorageToken* Alloc(StorageToken* prototype, int64_t storage_id);
-  /*!
-   * \brief Check if we can release token.
-   * \param tok The token to be released.
-   */
-  void CheckForRelease(StorageToken* tok);
   /*!
    * \brief Get the texture 2d size requirement
    * \param prototype The prototype token.
-   * \return The required texture 2d memory size in (width, height, channel).
+   * \return the physical memory size.

Review Comment:
   ```suggestion
      * \return The physical memory size.
   ```



##########
src/runtime/opencl/opencl_device_api.cc:
##########
@@ -222,8 +227,61 @@ void* 
OpenCLWorkspace::CreateHostPtrIfEnabled(cl::BufferDescriptor* desc, Device
 void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t size, size_t 
alignment,
                                       DLDataType type_hint) {
   this->Init();
+  return AllocCLBuffer(dev, size, alignment, type_hint);
+}
+
+void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t width, size_t height, 
DLDataType type_hint,
+                                      Optional<String> mem_scope) {
+  // Texture allocation given width and height
+  cl_uint row_align = GetImageAlignment(dev.device_id);
+  size_t pixel_size = (type_hint.bits * type_hint.lanes + 7) / 8;
+  size_t row_pitch = ALIGN_UP(width * pixel_size * 4, row_align);  // CL_RGBA 
= 4
+  size_t mem_size = row_pitch * height;
+
+  // Alloc back buffer from pool
+  cl::BufferDescriptor* back_buffer = nullptr;
+  if (IsBufferToImageSupported(dev.device_id)) {
+    auto buf = MemoryManager::GetOrCreateAllocator(dev, AllocatorType::kPooled)
+                   ->Alloc(mem_size, kTempAllocaAlignment, type_hint);
+    back_buffer = static_cast<cl::BufferDescriptor*>(buf.data);
+    back_buffer->mbuf = buf;
+  }
+
+  if (!mem_scope.defined()) {
+    mem_scope = Optional<String>("global.texture");
+  }
+  return AllocCLImage(dev, back_buffer, width, height, row_pitch, type_hint, 
mem_scope);
+}
+
+void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t* 
shape, DLDataType dtype,
+                                      Optional<String> mem_scope) {
+  this->Init();
+  if (!mem_scope.defined() || mem_scope.value() == "global") {

Review Comment:
   ```suggestion
     if (!mem_scope.defined() || mem_scope.value().empty() || mem_scope.value() 
== "global") {
   ```



##########
include/tvm/runtime/memory_manager.h:
##########
@@ -36,9 +36,8 @@
 
 namespace tvm {
 namespace runtime {
-namespace vm {
 
-struct Buffer {
+struct MBuffer {

Review Comment:
   Why did you rename `Buffer` in such way?



##########
src/relay/backend/token_allocator.cc:
##########
@@ -31,22 +31,39 @@
 
 namespace tvm {
 namespace relay {
+constexpr auto Is2DStorage = runtime::IsTextureStorage;
 
-size_t TokenAllocator1D::GetMemorySize(StorageToken* prototype) {
+/*
+ * Mixed mode memory allocator
+ */
+size_t TokenAllocatorMixed::GetMemorySize(StorageToken* prototype) {
   TensorType ttype = prototype->ttype;
   ICHECK(ttype.defined());
   size_t size = 1;
-  for (IndexExpr dim : ttype->shape) {
-    const int64_t* pval = tir::as_const_int(dim);
-    ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " 
<< ttype->shape;
-    ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative 
shape" << *pval;
-    size *= static_cast<size_t>(pval[0]);
+  if (relay::Is2DStorage(prototype->virtual_device->memory_scope)) {
+    size = GetSize2D(prototype);
+  } else {
+    for (IndexExpr dim : ttype->shape) {
+      const int64_t* pval = tir::as_const_int(dim);
+      ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape 
" << ttype->shape;
+      ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative 
shape" << *pval;
+      size *= static_cast<size_t>(pval[0]);
+    }
+    size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8);
   }
-  size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8);
   return size;
 }
 
-StorageToken* TokenAllocator1D::Request(StorageToken* prototype) {
+bool IsAdreno(StorageToken* tok) {

Review Comment:
   I just thought about using `adreno` key in `token_allocator.cc` which should 
be target agnostic... I have two ideas:
   1. Probably this method can be unified and renamed. E.g. `bool 
IsTargetContainsKey(const Target& tgt, String key)` and this method should be 
called in this way: `bool is_prototype_adreno = 
IsTargetContainsKey(prototype->virtual_device->target, "adreno");`. In this 
case, it can be reused in the future for another keys.
   2. Maybe you can introduce a new attribute for device like in 
`target_kind.cc` and use this attribute instead of extracting key `adreno` from 
virtual device? 
   
   To be honest, I mostly prefer the second option.



##########
src/runtime/opencl/opencl_device_api.cc:
##########
@@ -237,23 +295,131 @@ void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t 
size, size_t alignment,
   return CreateHostPtrIfEnabled(desc, dev, size);
 }
 
-void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t* 
shape, DLDataType dtype,
-                                      Optional<String> mem_scope) {
+void* OpenCLWorkspace::AllocCLImage(Device dev, void* back_buffer, size_t 
width, size_t height,
+                                    size_t row_pitch, DLDataType type_hint,
+                                    Optional<String> mem_scope) {
+  this->Init();
+  cl::BufferDescriptor* back_desc = 
static_cast<cl::BufferDescriptor*>(back_buffer);
+  cl_device_id device_id = GetCLDeviceID(dev.device_id);
+  auto platform = device_info[device_id].platform_id;
+  cl_int err_code;
+  cl_channel_type cl_type = DTypeToOpenCLChannelType(type_hint);
+  cl_image_format format = {CL_RGBA, cl_type};
+  cl_image_desc descriptor = {CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0, 0, 
0, 0, 0};
+
+  if (IsBufferToImageSupported(dev.device_id)) {
+    descriptor.image_row_pitch = row_pitch;
+    descriptor.buffer = back_desc->buffer;
+  }
+  cl_mem mptr = clCreateImage(this->contexts[platform], CL_MEM_CREATE_FLAGS, 
&format, &descriptor,
+                              nullptr, &err_code);
+  OPENCL_CHECK_ERROR(err_code);
+
+  cl::BufferDescriptor* desc = new cl::BufferDescriptor(mem_scope);
+  desc->buffer = mptr;
+  desc->back_buffer = back_desc;
+
+  return desc;
+}
+
+size_t OpenCLWorkspace::GetDataSize(const DLTensor& arr, Optional<String> 
mem_scope) {
   if (!mem_scope.defined() || mem_scope.value() == "global") {
-    return DeviceAPI::AllocDataSpace(dev, ndim, shape, dtype, mem_scope);
+    return DeviceAPI::GetDataSize(arr);
   }
-  ICHECK(IsTextureStorage(std::string(mem_scope.value())))
-      << "Device does not support allocate data space with "
-      << "specified memory scope: " << mem_scope.value();
+  cl_uint row_align = GetImageAlignment(GetThreadEntry()->device.device_id);
+  std::vector<int64_t> shape;
+  shape.assign(arr.shape, arr.shape + arr.ndim);
+  return runtime::GetTextureMemorySize<std::vector<int64_t>>(shape, 
arr.dtype.bits, arr.dtype.lanes,
+                                                             
mem_scope.value(), row_align);
+}
 
-  ICHECK(ndim > 2) << "Shape for texture allocation must be at least rank 3; "
-                   << "provided shape is rank " << ndim;
+static size_t GetMemObjectSize(Device dev, int ndim, const int64_t* shape, 
DLDataType dtype) {
+  DLTensor temp;
+  temp.data = nullptr;
+  temp.device = dev;
+  temp.ndim = ndim;
+  temp.dtype = dtype;
+  temp.shape = const_cast<int64_t*>(shape);
+  temp.strides = nullptr;
+  temp.byte_offset = 0;
+  size_t size = GetDataSize(temp);
+  return size;
+}
 
-  cl::BufferDescriptor* desc = new cl::BufferDescriptor(mem_scope);
+void* OpenCLWorkspace::AllocDataSpaceView(Device dev, void* data, int ndim, 
const int64_t* shape,
+                                          DLDataType dtype, Optional<String> 
mem_scope) {
+  cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(data);
+
+  // Fall back for devices w/o "cl_khr_image2d_from_buffer"
+  if (!IsBufferToImageSupported(dev.device_id)) {
+    cl::BufferDescriptor* ret_desc = desc;  // buffer -> buffer
+    if (!mem_scope.defined() || mem_scope.value() == "global") {
+      if (desc->layout != cl::BufferDescriptor::MemoryLayout::kBuffer1D) {
+        // image -> buffer
+        size_t nbytes = GetMemObjectSize(dev, ndim, shape, dtype);
+        ret_desc = static_cast<cl::BufferDescriptor*>(
+            OpenCLWorkspace::AllocCLBuffer(dev, nbytes, kTempAllocaAlignment, 
dtype));
+        ret_desc->is_compat_view = true;
+      }
+    } else {
+      // Any -> Image
+      size_t axis = DefaultTextureLayoutSeparator(ndim, mem_scope.value());
+      auto texture = ApplyTexture2DFlattening<int64_t>(shape, ndim, axis);
+      cl_uint row_align = GetImageAlignment(dev.device_id);
+      size_t pixel_size = (dtype.bits * dtype.lanes + 7) / 8;
+      size_t row_pitch = ALIGN_UP(texture.width * pixel_size * 4, row_align);  
// CL_RGBA = 4
+
+      ret_desc = 
static_cast<cl::BufferDescriptor*>(OpenCLWorkspace::Global()->AllocCLImage(
+          dev, nullptr, texture.width, texture.height, row_pitch, dtype, 
mem_scope));
+      ret_desc->is_compat_view = true;
+    }
+    return ret_desc;
+  }
+
+  if (!mem_scope.defined() || mem_scope.value() == "global") {

Review Comment:
   ```suggestion
     if (!mem_scope.defined() || mem_scope.value().empty() || mem_scope.value() 
== "global") {
   ```



##########
tests/cpp-runtime/opencl/opencl_texture_pool_test.cc:
##########


Review Comment:
   The tests on the texture pool were removed, but which tests should replace 
them? Maybe you should add new tests on memory manager and check how allocators 
work? Such tests are available here: 
https://github.com/apache/tvm/blob/main/tests/cpp/runtime/vm/memory_manager_tests.cc



##########
src/runtime/opencl/opencl_device_api.cc:
##########
@@ -557,9 +745,14 @@ 
TVM_REGISTER_GLOBAL("device_api.opencl.alloc_nd").set_body([](TVMArgs args, TVMR
   type_hint.bits = static_cast<decltype(type_hint.bits)>(dtype_bits_hint);
   type_hint.lanes = 1;
 
-  OpenCLWorkspace* ptr = OpenCLWorkspace::Global();
-  *rv = ptr->AllocTextureWorkspace(dev, static_cast<size_t>(width), 
static_cast<size_t>(height),
-                                   type_hint);
+  /**rv = OpenCLWorkspace::Global()->GetThreadEntry()->mpool.AllocMemory(
+      dev, static_cast<size_t>(width), static_cast<size_t>(height), type_hint,
+      Optional<String>("global.texture"));
+  */

Review Comment:
   I think that this code can be removed



##########
src/runtime/opencl/opencl_device_api.cc:
##########
@@ -237,23 +295,131 @@ void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t 
size, size_t alignment,
   return CreateHostPtrIfEnabled(desc, dev, size);
 }
 
-void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t* 
shape, DLDataType dtype,
-                                      Optional<String> mem_scope) {
+void* OpenCLWorkspace::AllocCLImage(Device dev, void* back_buffer, size_t 
width, size_t height,
+                                    size_t row_pitch, DLDataType type_hint,
+                                    Optional<String> mem_scope) {
+  this->Init();
+  cl::BufferDescriptor* back_desc = 
static_cast<cl::BufferDescriptor*>(back_buffer);
+  cl_device_id device_id = GetCLDeviceID(dev.device_id);
+  auto platform = device_info[device_id].platform_id;
+  cl_int err_code;
+  cl_channel_type cl_type = DTypeToOpenCLChannelType(type_hint);
+  cl_image_format format = {CL_RGBA, cl_type};
+  cl_image_desc descriptor = {CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0, 0, 
0, 0, 0};
+
+  if (IsBufferToImageSupported(dev.device_id)) {
+    descriptor.image_row_pitch = row_pitch;
+    descriptor.buffer = back_desc->buffer;
+  }
+  cl_mem mptr = clCreateImage(this->contexts[platform], CL_MEM_CREATE_FLAGS, 
&format, &descriptor,
+                              nullptr, &err_code);
+  OPENCL_CHECK_ERROR(err_code);
+
+  cl::BufferDescriptor* desc = new cl::BufferDescriptor(mem_scope);
+  desc->buffer = mptr;
+  desc->back_buffer = back_desc;
+
+  return desc;
+}
+
+size_t OpenCLWorkspace::GetDataSize(const DLTensor& arr, Optional<String> 
mem_scope) {
   if (!mem_scope.defined() || mem_scope.value() == "global") {

Review Comment:
   ```suggestion
     if (!mem_scope.defined() || mem_scope.value().empty() || mem_scope.value() 
== "global") {
   ```



##########
src/runtime/texture.h:
##########
@@ -94,74 +96,25 @@ inline bool IsTextureStorage(std::string scope) {
   return scope.find("texture") != std::string::npos;
 }
 
-class TVM_DLL Pool2D {
- public:
-  Pool2D() = default;
-  void* Alloc(Device dev, DeviceAPI* device, size_t width, size_t height, 
DLDataType type_hint);
-  void Free(void* data);
-  // Release all resources immediately
-  void Release(Device dev, DeviceAPI* device);
-
- protected:
-  struct Entry {
-    void* data;
-    size_t x;
-    size_t y;
-    DLDataType type;
-  };
-  std::vector<Entry> free_list_;
-  std::vector<Entry> allocated_;
-};
-
 /*!
- * \brief A two dimensional storage pool that recycles temporal workspace
- * allocations for dynamically allocated texture. See AllocTexture docstring
- * for approach to allocation and reuse.
+ * \param shape shape of tensor

Review Comment:
   Could you please add a brief description of this function?



##########
src/runtime/opencl/opencl_device_api.cc:
##########
@@ -222,8 +227,61 @@ void* 
OpenCLWorkspace::CreateHostPtrIfEnabled(cl::BufferDescriptor* desc, Device
 void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t size, size_t 
alignment,
                                       DLDataType type_hint) {
   this->Init();
+  return AllocCLBuffer(dev, size, alignment, type_hint);
+}
+
+void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t width, size_t height, 
DLDataType type_hint,
+                                      Optional<String> mem_scope) {
+  // Texture allocation given width and height
+  cl_uint row_align = GetImageAlignment(dev.device_id);
+  size_t pixel_size = (type_hint.bits * type_hint.lanes + 7) / 8;
+  size_t row_pitch = ALIGN_UP(width * pixel_size * 4, row_align);  // CL_RGBA 
= 4
+  size_t mem_size = row_pitch * height;
+
+  // Alloc back buffer from pool
+  cl::BufferDescriptor* back_buffer = nullptr;
+  if (IsBufferToImageSupported(dev.device_id)) {
+    auto buf = MemoryManager::GetOrCreateAllocator(dev, AllocatorType::kPooled)

Review Comment:
   You use `AllocatorType::kPooled` here. What about `AllocatorType::kNaive`? 
Probably both of them should be supported and the user should be able to 
specify the allocator type when VM is created.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [tvm] echuraev commented on a diff in pull request #15058: [OpenCL][Texture] Improved texture memory planning

Reply via email to