echuraev commented on code in PR #15058:
URL: https://github.com/apache/tvm/pull/15058#discussion_r1349867420
##########
apps/android_deploy/app/src/main/jni/tvm_runtime.h:
##########
@@ -49,6 +49,5 @@
#include "../src/runtime/opencl/opencl_device_api.cc"
#include "../src/runtime/opencl/opencl_module.cc"
#include "../src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc"
Review Comment:
Probably we should add `opencl/memory_pool.cc` to the include list? The same
question for `android_rpc`.
##########
src/runtime/memory/memory_manager.cc:
##########
@@ -128,6 +128,7 @@ Allocator* MemoryManager::GetOrCreateAllocator(Device dev,
AllocatorType type) {
// Look for any available, else create Naive.
if (type == AllocatorType::kAny) {
+ it = m->allocators_.find(dev);
Review Comment:
Do you really need this line? I see on the line#124 the same iterator was
found.
##########
src/runtime/opencl/opencl_device_api.cc:
##########
@@ -269,36 +430,32 @@ void OpenCLWorkspace::FreeDataSpace(Device dev, void*
ptr) {
OPENCL_CALL(clFinish(this->GetQueue(dev)));
cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(ptr);
- if (desc->host_ptr) {
- clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer,
- reinterpret_cast<void*>(desc->host_ptr), 0,
nullptr, nullptr);
- }
- OPENCL_CALL(clReleaseMemObject(desc->buffer));
- delete desc;
-}
-cl_mem OpenCLWorkspace::AllocTexture(Device dev, size_t width, size_t height,
- DLDataType type_hint) {
- this->Init();
- cl_device_id device_id = GetCLDeviceID(dev.device_id);
- auto platform = device_to_platform[device_id];
- cl_int err_code;
- cl_channel_type cl_type = DTypeToOpenCLChannelType(type_hint);
- cl_image_format format = {CL_RGBA, cl_type};
- cl_image_desc descriptor = {CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0, 0,
0, 0, 0};
- cl_mem mptr = clCreateImage(this->contexts[platform], CL_MEM_CREATE_FLAGS,
&format, &descriptor,
- nullptr, &err_code);
- OPENCL_CHECK_ERROR(err_code);
- return mptr;
-}
-
-void* OpenCLWorkspace::AllocTextureWorkspace(Device dev, size_t width, size_t
height,
- DLDataType type_hint) {
- return GetThreadEntry()->texture_pool.AllocTexture(dev, width, height,
type_hint);
-}
-
-void OpenCLWorkspace::FreeTextureWorkspace(Device dev, void* ptr) {
- GetThreadEntry()->texture_pool.FreeTexture(dev, ptr);
+ if (desc->back_buffer) {
+ // 2D Image w/ back buffer allocated from pool
+ OPENCL_CALL(clReleaseMemObject(desc->buffer));
+ // GetThreadEntry()->mpool.FreeMemory(dev, desc->back_buffer);
+ MemoryManager::GetAllocator(dev, desc->back_buffer->mbuf.alloc_type)
+ ->Free(desc->back_buffer->mbuf);
+ delete desc;
+ } else {
+ if (desc->layout == cl::BufferDescriptor::MemoryLayout::kBuffer1D) {
+ // 1D buffer allocated from pool
+ // GetThreadEntry()->mpool.FreeMemory(dev, desc);
Review Comment:
Do we need this comment?
##########
src/runtime/opencl/opencl_device_api.cc:
##########
@@ -224,8 +241,57 @@ void*
OpenCLWorkspace::CreateHostPtrIfEnabled(cl::BufferDescriptor* desc, Device
void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t size, size_t
alignment,
DLDataType type_hint) {
this->Init();
+ return AllocCLBuffer(dev, size, alignment, type_hint);
+}
+
+void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t width, size_t height,
DLDataType type_hint,
+ Optional<String> mem_scope) {
+ // Texture allocation given width and height
+ cl_uint row_align = GetImageAlignment(dev.device_id);
+ size_t pixel_size = (type_hint.bits * type_hint.lanes + 7) / 8;
+ size_t row_pitch = ALIGN_UP(width * pixel_size * 4, row_align); // CL_RGBA
= 4
+ size_t mem_size = row_pitch * height;
+
+ // Alloc back buffer from pool
+ cl::BufferDescriptor* back_buffer = nullptr;
+ if (IsBufferToImageSupported(dev.device_id)) {
+ auto buf = MemoryManager::GetOrCreateAllocator(dev, AllocatorType::kAny)
+ ->Alloc(mem_size, kTempAllocaAlignment, type_hint);
+ back_buffer = static_cast<cl::BufferDescriptor*>(buf.data);
+ back_buffer->mbuf = buf;
+ }
+
+ if (!mem_scope.defined()) {
Review Comment:
Should we also add check on `mem_scope.empty() || mem_scope == "global"`? Or
`ICHECK` can be added to validate that `mem_scope` has a valid value.
##########
src/runtime/opencl/opencl_device_api.cc:
##########
@@ -269,36 +430,32 @@ void OpenCLWorkspace::FreeDataSpace(Device dev, void*
ptr) {
OPENCL_CALL(clFinish(this->GetQueue(dev)));
cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(ptr);
- if (desc->host_ptr) {
- clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer,
- reinterpret_cast<void*>(desc->host_ptr), 0,
nullptr, nullptr);
- }
- OPENCL_CALL(clReleaseMemObject(desc->buffer));
- delete desc;
-}
-cl_mem OpenCLWorkspace::AllocTexture(Device dev, size_t width, size_t height,
- DLDataType type_hint) {
- this->Init();
- cl_device_id device_id = GetCLDeviceID(dev.device_id);
- auto platform = device_to_platform[device_id];
- cl_int err_code;
- cl_channel_type cl_type = DTypeToOpenCLChannelType(type_hint);
- cl_image_format format = {CL_RGBA, cl_type};
- cl_image_desc descriptor = {CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0, 0,
0, 0, 0};
- cl_mem mptr = clCreateImage(this->contexts[platform], CL_MEM_CREATE_FLAGS,
&format, &descriptor,
- nullptr, &err_code);
- OPENCL_CHECK_ERROR(err_code);
- return mptr;
-}
-
-void* OpenCLWorkspace::AllocTextureWorkspace(Device dev, size_t width, size_t
height,
- DLDataType type_hint) {
- return GetThreadEntry()->texture_pool.AllocTexture(dev, width, height,
type_hint);
-}
-
-void OpenCLWorkspace::FreeTextureWorkspace(Device dev, void* ptr) {
- GetThreadEntry()->texture_pool.FreeTexture(dev, ptr);
+ if (desc->back_buffer) {
+ // 2D Image w/ back buffer allocated from pool
+ OPENCL_CALL(clReleaseMemObject(desc->buffer));
+ // GetThreadEntry()->mpool.FreeMemory(dev, desc->back_buffer);
Review Comment:
Why this comment was added?
##########
src/runtime/opencl/opencl_device_api.cc:
##########
@@ -239,25 +305,120 @@ void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t
size, size_t alignment,
return CreateHostPtrIfEnabled(desc, dev, size);
}
-void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t*
shape, DLDataType dtype,
- Optional<String> mem_scope) {
- if (!mem_scope.defined() || mem_scope.value().empty() || mem_scope.value()
== "global") {
- return DeviceAPI::AllocDataSpace(dev, ndim, shape, dtype, mem_scope);
- }
- ICHECK(IsTextureStorage(std::string(mem_scope.value())))
- << "Device does not support allocate data space with "
- << "specified memory scope: " << mem_scope.value();
+void* OpenCLWorkspace::AllocCLImage(Device dev, void* back_buffer, size_t
width, size_t height,
+ size_t row_pitch, DLDataType type_hint,
+ Optional<String> mem_scope) {
+ this->Init();
+ cl::BufferDescriptor* back_desc =
static_cast<cl::BufferDescriptor*>(back_buffer);
+ cl_device_id device_id = GetCLDeviceID(dev.device_id);
+ auto platform = device_info[device_id].platform_id;
+ cl_int err_code;
+ cl_channel_type cl_type = DTypeToOpenCLChannelType(type_hint);
+ cl_image_format format = {CL_RGBA, cl_type};
+ cl_image_desc descriptor = {CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0, 0,
0, 0, 0};
- ICHECK(ndim > 2) << "Shape for texture allocation must be at least rank 3; "
- << "provided shape is rank " << ndim;
+ if (IsBufferToImageSupported(dev.device_id)) {
+ descriptor.image_row_pitch = row_pitch;
+ descriptor.buffer = back_desc->buffer;
+ }
+ cl_mem mptr = clCreateImage(this->contexts[platform], CL_MEM_CREATE_FLAGS,
&format, &descriptor,
+ nullptr, &err_code);
+ OPENCL_CHECK_ERROR(err_code);
cl::BufferDescriptor* desc = new cl::BufferDescriptor(mem_scope);
- size_t axis = DefaultTextureLayoutSeparator(ndim, mem_scope.value());
- auto texture = ApplyTexture2DFlattening<int64_t>(shape, ndim, axis);
- desc->buffer = AllocTexture(dev, texture.width, texture.height, dtype);
+ desc->buffer = mptr;
+ desc->back_buffer = back_desc;
+
return desc;
}
+size_t OpenCLWorkspace::GetDataSize(const DLTensor& arr, Optional<String>
mem_scope) {
+ if (!mem_scope.defined() || mem_scope.value().empty() || mem_scope.value()
== "global") {
+ return DeviceAPI::GetDataSize(arr);
+ }
+ cl_uint row_align = GetImageAlignment(GetThreadEntry()->device.device_id);
+ std::vector<int64_t> shape;
+ shape.assign(arr.shape, arr.shape + arr.ndim);
+ return runtime::GetTextureMemorySize<std::vector<int64_t>>(shape,
arr.dtype.bits, arr.dtype.lanes,
+
mem_scope.value(), row_align);
+}
+
+void* OpenCLWorkspace::AllocDataSpaceView(Device dev, void* data, ShapeTuple
shape,
+ DLDataType dtype, Optional<String>
mem_scope) {
+ cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(data);
+
+ // Fall back for devices w/o "cl_khr_image2d_from_buffer"
+ if (!IsBufferToImageSupported(dev.device_id)) {
+ cl::BufferDescriptor* ret_desc = desc; // buffer -> buffer
+ if (!mem_scope.defined() || mem_scope.value() == "global") {
Review Comment:
```suggestion
if (!mem_scope.defined() || mem_scope.value().empty() ||
mem_scope.value() == "global") {
```
##########
src/runtime/opencl/opencl_device_api.cc:
##########
@@ -529,9 +693,29 @@ void OpenCLWorkspace::Init(const std::string& type_key,
const std::string& devic
this->devices.insert(this->devices.end(), devices.begin(), devices.end());
for (size_t i = 0; i < devices.size(); ++i) {
cl_device_id did = devices[i];
- device_to_platform[did] = platform;
+ CLDeviceInfo dev_info;
+ dev_info.platform_id = platform;
this->queues.push_back(clCreateCommandQueue(this->contexts[platform],
did, 0, &err_code));
OPENCL_CHECK_ERROR(err_code);
+ cl_uint row_pitch;
+ OPENCL_CALL(clGetDeviceInfo(did, CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR,
sizeof(row_pitch),
+ &row_pitch, nullptr));
+ if (0 == row_pitch) {
+ row_pitch = kAllocAlignment; // Fallback
+ }
+ dev_info.image_row_align = row_pitch;
+ size_t reqd_size = 0;
+ OPENCL_CALL(clGetDeviceInfo(did, CL_DEVICE_EXTENSIONS, 0, nullptr,
&reqd_size));
+ std::vector<char> extn_buf(reqd_size);
+ OPENCL_CALL(clGetDeviceInfo(did, CL_DEVICE_EXTENSIONS, reqd_size,
extn_buf.data(), nullptr));
+ std::string extensions(extn_buf.data());
+
+ if (extensions.find("cl_khr_image2d_from_buffer") != std::string::npos) {
+ dev_info.image_from_buffer_support = true;
+ } else {
+ dev_info.image_from_buffer_support = false;
+ }
Review Comment:
Just an idea of improvement. I suppose that this piece of code can be moved
in a separate function, e.g. `bool IsOpenCLExtensionSupported(const
std::string& name)` and this function can be used in different places with
different extensions in the future.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]