the pointer must be 64 byte aligned, and only when w,h equals to its aligned value, otherwise, roll back to the old method with extra copying.
Signed-off-by: Guo Yejun <yejun....@intel.com> --- src/cl_command_queue.c | 7 ++----- src/cl_enqueue.c | 10 ++++++---- src/cl_mem.c | 45 ++++++++++++++++++++++++++++++++++++--------- 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 4b92311..f506a87 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -135,19 +135,16 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k) struct _cl_mem_image *image; assert(interp_kernel_get_arg_type(k->opaque, id) == GBE_ARG_IMAGE); - //currently, user ptr is not supported for cl image, so offset should be always zero - assert(k->args[id].mem->offset == 0); - image = cl_mem_image(k->args[id].mem); set_image_info(k->curbe, &k->images[i], image); - cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, image->offset, + cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, image->offset + k->args[id].mem->offset, image->intel_fmt, image->image_type, image->bpp, image->w, image->h, image->depth, image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling); // TODO, this workaround is for GEN7/GEN75 only, we may need to do it in the driver layer // on demand. if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) - cl_gpgpu_bind_image(gpgpu, k->images[i].idx + BTI_WORKAROUND_IMAGE_OFFSET, image->base.bo, image->offset, + cl_gpgpu_bind_image(gpgpu, k->images[i].idx + BTI_WORKAROUND_IMAGE_OFFSET, image->base.bo, image->offset + k->args[id].mem->offset, image->intel_fmt, image->image_type, image->bpp, image->w, image->h, image->depth, image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling); diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index 9e34bb8..cec368c 100644 --- a/src/cl_enqueue.c +++ b/src/cl_enqueue.c @@ -316,8 +316,9 @@ cl_int cl_enqueue_map_image(enqueue_data *data) if(mem->flags & CL_MEM_USE_HOST_PTR) { assert(mem->host_ptr); - //src and dst need add offset in function cl_mem_copy_image_region - cl_mem_copy_image_region(data->origin, data->region, + if (!mem->is_userptr) + //src and dst need add offset in function cl_mem_copy_image_region + cl_mem_copy_image_region(data->origin, data->region, mem->host_ptr, image->host_row_pitch, image->host_slice_pitch, data->ptr, row_pitch, image->slice_pitch, image, CL_TRUE, CL_TRUE); } @@ -374,8 +375,9 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data) row_pitch = image->slice_pitch; else row_pitch = image->row_pitch; - //v_ptr have added offset, host_ptr have not added offset. - cl_mem_copy_image_region(origin, region, v_ptr, row_pitch, image->slice_pitch, + if (!memobj->is_userptr) + //v_ptr have added offset, host_ptr have not added offset. + cl_mem_copy_image_region(origin, region, v_ptr, row_pitch, image->slice_pitch, memobj->host_ptr, image->host_row_pitch, image->host_slice_pitch, image, CL_FALSE, CL_TRUE); } diff --git a/src/cl_mem.c b/src/cl_mem.c index 80c9064..c512355 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -288,7 +288,6 @@ cl_mem_allocate(enum cl_mem_type type, int cacheline_size = 0; cl_get_device_info(ctx->device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL); - /* currently only cl buf is supported, will add cl image support later */ if (type == CL_MEM_BUFFER_TYPE) { if (flags & CL_MEM_USE_HOST_PTR) { assert(host_ptr != NULL); @@ -312,6 +311,18 @@ cl_mem_allocate(enum cl_mem_type type, mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", internal_host_ptr, alignedSZ, 0); bufCreated = 1; } + } else if (type == CL_MEM_IMAGE_TYPE) { + if (host_ptr != NULL) { + assert(flags & CL_MEM_USE_HOST_PTR); + assert(!is_tiled); + assert(ALIGN((unsigned long)host_ptr, cacheline_size) == (unsigned long)host_ptr); + void* aligned_host_ptr = (void*)(((unsigned long)host_ptr) & (~(page_size - 1))); + mem->offset = host_ptr - aligned_host_ptr; + mem->is_userptr = 1; + size_t aligned_sz = ALIGN((mem->offset + sz), page_size); + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0); + bufCreated = 1; + } } } @@ -823,6 +834,16 @@ _cl_mem_new_image(cl_context ctx, #undef DO_IMAGE_ERROR + uint8_t enableUserptr = 0; + if (ctx->device->host_unified_memory && data != NULL && (flags & CL_MEM_USE_HOST_PTR)) { + int cacheline_size = 0; + cl_get_device_info(ctx->device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL); + if (ALIGN((unsigned long)data, cacheline_size) == (unsigned long)data) { //might more conditions here + tiling = CL_NO_TILE; + enableUserptr = 1; + } + } + /* Tiling requires to align both pitch and height */ if (tiling == CL_NO_TILE) { aligned_pitch = w * bpp; @@ -861,8 +882,12 @@ _cl_mem_new_image(cl_context ctx, if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER) { if (image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL) mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, buffer, &err); - else - mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, data, NULL, &err); + else { + if (enableUserptr) + mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, data, NULL, &err); + else + mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, NULL, &err); + } } else { mem = cl_mem_allocate(CL_MEM_BUFFER1D_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, NULL, &err); if (mem != NULL && err == CL_SUCCESS) { @@ -892,13 +917,15 @@ _cl_mem_new_image(cl_context ctx, 0, 0, 0); /* Copy the data if required */ - if (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { + if (flags & CL_MEM_COPY_HOST_PTR) cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data); - if (flags & CL_MEM_USE_HOST_PTR) { - mem->host_ptr = data; - cl_mem_image(mem)->host_row_pitch = pitch; - cl_mem_image(mem)->host_slice_pitch = slice_pitch; - } + + if (flags & CL_MEM_USE_HOST_PTR) { + mem->host_ptr = data; + cl_mem_image(mem)->host_row_pitch = pitch; + cl_mem_image(mem)->host_slice_pitch = slice_pitch; + if (!enableUserptr) + cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data); } exit: -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet