From: Yan Wang <yan.w...@linux.intel.com> It may failed to copy data from host ptr to TILE_Y large image. So use clCopyBufferToImage to do this on GPU side.
Signed-off-by: Yan Wang <yan.w...@linux.intel.com> --- src/cl_mem.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- src/cl_mem.h | 2 ++ 2 files changed, 97 insertions(+), 5 deletions(-) diff --git a/src/cl_mem.c b/src/cl_mem.c index 4a7bec8..fe0dd2f 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -149,6 +149,8 @@ cl_mem_allocate(enum cl_mem_type type, mem->is_userptr = 0; mem->offset = 0; mem->is_svm = 0; + mem->is_ker_copy = 0; + mem->tmp_ker_buf = NULL; mem->cmrt_mem = NULL; if (mem->type == CL_MEM_IMAGE_TYPE) { cl_mem_image(mem)->is_image_from_buffer = 0; @@ -750,6 +752,77 @@ cl_image_tiling_t cl_get_default_tiling(cl_driver drv) return tiling; } +cl_command_queue image_queue = NULL; + +static cl_mem +_cl_new_image_copy_from_host_ptr(cl_context ctx, + cl_mem_flags flags, + const cl_image_format *fmt, + const cl_mem_object_type image_type, + size_t w, + size_t h, + size_t depth, + size_t pitch, + size_t slice_pitch, + size_t sz, + size_t aligned_pitch, + uint32_t intel_fmt, + uint32_t bpp, + cl_image_tiling_t tiling, + void *data, //pointer from application + cl_int *errcode_ret) +{ + cl_int err = CL_SUCCESS; + cl_mem mem = NULL; + size_t origin[3] = {0, 0, 0}; + size_t region[3] = {w, h, depth}; + size_t aligned_slice_pitch = 0; + + if (image_queue == NULL) { + image_queue = clCreateCommandQueueWithProperties(ctx, ctx->devices[0], 0, &err); + if (err != CL_SUCCESS) { + *errcode_ret = err; + return NULL; + } + } + + // Map host ptr to OCL buffer + cl_mem buf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, sz, data, &err); + if (err != CL_SUCCESS) { + *errcode_ret = err; + return NULL; + } + + mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, NULL, &err); + if (mem == NULL || err != CL_SUCCESS) { + clReleaseMemObject(buf); + return NULL; + } + + cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch); + + if (image_type == CL_MEM_OBJECT_IMAGE2D) + aligned_slice_pitch = 0; + else + //SKL need use tiling's aligned_h to calc slice_pitch and IVB to BDW need CL_NO_TILE's aligned_h to calc. + aligned_slice_pitch = aligned_pitch * ALIGN(h, cl_buffer_get_tiling_align(ctx, tiling, 2)); + + cl_mem_image_init(cl_mem_image(mem), w, h, image_type, depth, *fmt, + intel_fmt, bpp, aligned_pitch, aligned_slice_pitch, tiling, + 0, 0, 0); + + err = clEnqueueCopyBufferToImage(image_queue, buf, mem, 0, origin, region, 0, NULL, NULL); + if(err != CL_SUCCESS) { + clReleaseMemObject(buf); + clReleaseMemObject(mem); + return NULL; + } + + mem->is_ker_copy = 1; + clReleaseMemObject(buf); + return mem; +} + static cl_mem _cl_mem_new_image(cl_context ctx, cl_mem_flags flags, @@ -765,6 +838,7 @@ _cl_mem_new_image(cl_context ctx, cl_int *errcode_ret) { cl_int err = CL_SUCCESS; + cl_bool is_ker_copy = 0; cl_mem mem = NULL; cl_mem_object_type image_type = orig_image_type; uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT; @@ -931,11 +1005,22 @@ _cl_mem_new_image(cl_context ctx, /* If sz is large than 128MB, map gtt may fail in some system. Because there is no obviours performance drop, disable tiling. */ - if(tiling != CL_NO_TILE && sz > MAX_TILING_SIZE) { - tiling = CL_NO_TILE; - aligned_pitch = w * bpp; - aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1)); - sz = aligned_pitch * aligned_h * depth; + if (tiling != CL_NO_TILE && sz > MAX_TILING_SIZE) { + if ((image_type == CL_MEM_OBJECT_IMAGE2D || image_type == CL_MEM_OBJECT_IMAGE3D) && + (flags & CL_MEM_COPY_HOST_PTR)) { + mem = _cl_new_image_copy_from_host_ptr(ctx, flags, fmt, image_type, w, h, depth, pitch, + slice_pitch, sz, aligned_pitch, intel_fmt, bpp, tiling, data, &err); + if (mem != NULL) + goto exit; + } + + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) { + tiling = CL_NO_TILE; + aligned_pitch = w * bpp; + aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1)); + sz = aligned_pitch * aligned_h * depth; + } else + is_ker_copy = 1; } if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER) { @@ -992,6 +1077,8 @@ _cl_mem_new_image(cl_context ctx, cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data); } + mem->is_ker_copy = is_ker_copy; + exit: if (errcode_ret) *errcode_ret = err; @@ -1368,6 +1455,9 @@ cl_mem_delete(cl_mem mem) cl_free(cb); } + if (mem->tmp_ker_buf) + cl_mem_delete(mem->tmp_ker_buf); + /* iff we are a image, delete the 1d buffer if has. */ if (IS_IMAGE(mem)) { if (cl_mem_image(mem)->buffer_1d) { diff --git a/src/cl_mem.h b/src/cl_mem.h index edfd043..20afe23 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -98,6 +98,8 @@ typedef struct _cl_mem { list_head dstr_cb_head; /* All destroy callbacks. */ uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled */ cl_bool is_svm; /* This object is svm */ + cl_bool is_ker_copy; /* this object is copied by OCL kernel */ + cl_mem tmp_ker_buf; /* this object is tmp buffer for OCL kernel copying */ size_t offset; /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/ uint8_t cmrt_mem_type; /* CmBuffer, CmSurface2D, ... */ -- 2.7.4 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet