csullivan commented on a change in pull request #7711:
URL: https://github.com/apache/tvm/pull/7711#discussion_r641988881
##########
File path: src/runtime/opencl/opencl_device_api.cc
##########
@@ -138,41 +195,122 @@ void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t
size, size_t alignment,
this->Init();
ICHECK(context != nullptr) << "No OpenCL device";
cl_int err_code;
- cl_mem mptr = clCreateBuffer(this->context, CL_MEM_READ_WRITE, size,
nullptr, &err_code);
+ cl::BufferDescriptor* desc = new cl::BufferDescriptor;
+ desc->buffer = clCreateBuffer(this->context, CL_MEM_READ_WRITE, size,
nullptr, &err_code);
+ desc->layout = cl::BufferDescriptor::MemoryLayout::BUFFER_1D;
+ desc->shape.push_back(size);
+ desc->dtype = type_hint;
OPENCL_CHECK_ERROR(err_code);
- return mptr;
+ return desc;
+}
+
+void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t*
shape, DLDataType dtype,
+ Optional<String> mem_scope) {
+ if (!mem_scope.defined() || mem_scope.value() == "global") {
+ return DeviceAPI::AllocDataSpace(dev, ndim, shape, dtype, mem_scope);
+ }
+ ICHECK(IsTextureStorage(std::string(mem_scope.value())))
+ << "Device does not support allocate data space with "
+ << "specified memory scope: " << mem_scope.value();
+
+ ICHECK(ndim > 2) << "Shape for texture allocation must be at least rank 3; "
+ << "provided shape is rank " << ndim;
+
+ cl::BufferDescriptor* desc = new cl::BufferDescriptor(mem_scope);
+ size_t axis = DefaultTextureLayoutSeparator(ndim, mem_scope.value());
+ auto texture = ApplyTexture2DFlattening<int64_t>(shape, ndim, axis);
+ desc->buffer = AllocTexture(dev, texture.width, texture.height, dtype);
+ desc->shape.insert(desc->shape.end(), &shape[0], &shape[ndim]);
+ desc->dtype = dtype;
+ return desc;
}
void OpenCLWorkspace::FreeDataSpace(Device dev, void* ptr) {
// We have to make sure that the memory object is not in the command queue
// for some OpenCL platforms.
OPENCL_CALL(clFinish(this->GetQueue(dev)));
- cl_mem mptr = static_cast<cl_mem>(ptr);
- OPENCL_CALL(clReleaseMemObject(mptr));
+ cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(ptr);
+ OPENCL_CALL(clReleaseMemObject(desc->buffer));
+ delete desc;
}
-void OpenCLWorkspace::CopyDataFromTo(const void* from, size_t from_offset,
void* to,
- size_t to_offset, size_t size, Device
dev_from, Device dev_to,
- DLDataType type_hint, TVMStreamHandle
stream) {
+cl_mem OpenCLWorkspace::AllocTexture(Device dev, size_t width, size_t height,
+ DLDataType type_hint) {
this->Init();
- ICHECK(stream == nullptr);
- if (IsOpenCLDevice(dev_from) && IsOpenCLDevice(dev_to)) {
- OPENCL_CALL(clEnqueueCopyBuffer(this->GetQueue(dev_to),
- static_cast<cl_mem>((void*)from), //
NOLINT(*)
- static_cast<cl_mem>(to), from_offset,
to_offset, size, 0,
- nullptr, nullptr));
- } else if (IsOpenCLDevice(dev_from) && dev_to.device_type == kDLCPU) {
- OPENCL_CALL(clEnqueueReadBuffer(this->GetQueue(dev_from),
- static_cast<cl_mem>((void*)from), //
NOLINT(*)
- CL_FALSE, from_offset, size,
static_cast<char*>(to) + to_offset,
- 0, nullptr, nullptr));
- OPENCL_CALL(clFinish(this->GetQueue(dev_from)));
- } else if (dev_from.device_type == kDLCPU && IsOpenCLDevice(dev_to)) {
- OPENCL_CALL(clEnqueueWriteBuffer(this->GetQueue(dev_to),
static_cast<cl_mem>(to), CL_FALSE,
- to_offset, size, static_cast<const
char*>(from) + from_offset,
- 0, nullptr, nullptr));
- OPENCL_CALL(clFinish(this->GetQueue(dev_to)));
+ ICHECK(context != nullptr) << "No OpenCL device";
+ cl_int err_code;
+ cl_channel_type cl_type = DTypeToOpenCLChannelType(type_hint);
+ cl_image_format format = {CL_RGBA, cl_type};
+ cl_image_desc descriptor = {CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0, 0,
0, 0, 0};
+ cl_mem mptr =
+ clCreateImage(this->context, CL_MEM_READ_WRITE, &format, &descriptor,
nullptr, &err_code);
+ OPENCL_CHECK_ERROR(err_code);
+ return mptr;
+}
+
+void* OpenCLWorkspace::AllocTextureWorkspace(Device dev, size_t width, size_t
height,
+ DLDataType type_hint) {
+ return GetThreadEntry()->texture_pool.AllocTexture(dev, width, height,
type_hint);
+}
+
+void OpenCLWorkspace::FreeTextureWorkspace(Device dev, void* ptr) {
+ GetThreadEntry()->texture_pool.FreeTexture(dev, ptr);
+}
+
+void OpenCLWorkspace::CopyDataFromTo(DLTensor* from, DLTensor* to,
TVMStreamHandle stream) {
+ size_t nbytes = GetDataSize(*from);
+ ICHECK_EQ(nbytes, GetDataSize(*to));
+ ICHECK(IsContiguous(*from) && IsContiguous(*to))
+ << "CopyDataFromTo only support contiguous array for now";
+
+ if (IsOpenCLDevice(from->device) && IsOpenCLDevice(to->device)) {
+ const auto* from_desc = static_cast<const
cl::BufferDescriptor*>(from->data);
+ ICHECK(from_desc->layout == cl::BufferDescriptor::MemoryLayout::BUFFER_1D)
+ << "Device to device copying is currently only implemented for OpenCL
buffer storage";
+ auto* to_desc = static_cast<cl::BufferDescriptor*>(to->data);
+ OPENCL_CALL(clEnqueueCopyBuffer(this->GetQueue(to->device),
from_desc->buffer, to_desc->buffer,
+ from->byte_offset, to->byte_offset,
nbytes, 0, nullptr,
+ nullptr));
+ } else if (IsOpenCLDevice(from->device) && to->device.device_type == kDLCPU)
{
+ const auto* from_desc = static_cast<const
cl::BufferDescriptor*>(from->data);
+ switch (from_desc->layout) {
+ case cl::BufferDescriptor::MemoryLayout::BUFFER_1D:
+ OPENCL_CALL(clEnqueueReadBuffer(
+ this->GetQueue(from->device), from_desc->buffer, CL_FALSE,
from->byte_offset, nbytes,
+ static_cast<char*>(to->data) + to->byte_offset, 0, nullptr,
nullptr));
+ break;
+ case cl::BufferDescriptor::MemoryLayout::IMAGE_2D_ACTIVATION:
+ case cl::BufferDescriptor::MemoryLayout::IMAGE_2D_WEIGHT:
+ auto image_info = GetImageInfo(from_desc, from);
+ // TODO(csullivan): Support calculating row_pitch correctly in the
case of reuse.
Review comment:
Thank you for the good suggestion, I added a
[test](https://github.com/apache/tvm/pull/7711/files#diff-e9741363006d59a2ce0f9cdac61da8f0b55b79aed6e9cab9a0922113266df46eR65)
to demonstrate writing to a subview of a texture and check that the data from
larger allocation contains the
[expected](https://github.com/apache/tvm/pull/7711/files#diff-e9741363006d59a2ce0f9cdac61da8f0b55b79aed6e9cab9a0922113266df46eR117)
two dimensional strides when copied back to host.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]