--- CMakeLists.txt | 2 + kernels/image_2D_buffer.cl | 15 +++++ src/CMakeLists.txt | 5 ++ src/cl_api.c | 9 +++ src/cl_device_id.c | 4 ++ src/cl_device_id.h | 4 ++ src/cl_extensions.c | 2 +- src/cl_gt_device.h | 4 ++ src/cl_mem.c | 156 +++++++++++++++++++++++++++++++++++++++++++-- utests/CMakeLists.txt | 6 ++ utests/image_2D_buffer.cpp | 89 ++++++++++++++++++++++++++ 11 files changed, 290 insertions(+), 6 deletions(-) create mode 100644 kernels/image_2D_buffer.cl create mode 100644 utests/image_2D_buffer.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt index 49c8929..5ca7d90 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -189,6 +189,8 @@ ELSE(OCLIcd_FOUND) MESSAGE(STATUS "Looking for OCL ICD header file - not found") ENDIF(OCLIcd_FOUND) +SET (OCL_IMAGE2D_BUFFER "true") + Find_Package(PythonInterp) ADD_SUBDIRECTORY(include) diff --git a/kernels/image_2D_buffer.cl b/kernels/image_2D_buffer.cl new file mode 100644 index 0000000..6b9060c --- /dev/null +++ b/kernels/image_2D_buffer.cl @@ -0,0 +1,15 @@ +__kernel void image_2D_buffer(image2d_t image1, image2d_t image2, sampler_t sampler, __global int *results) +{ + int x = get_global_id(0); + int y = get_global_id(1); + int w = get_image_width(image1); + int offset = mad24(y, w, x); + + int4 pix = read_imagei(image1, (int2)(x, y)); + int4 test = (pix != read_imagei(image2, sampler, (int2)(x, y))); + + if (test.x || test.y || test.z || test.w) + results[offset] = 0; + else + results[offset] = 1; +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7182bad..3ca5f1f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -114,6 +114,11 @@ SET(CMAKE_CXX_FLAGS "-DHAS_USERPTR ${CMAKE_CXX_FLAGS}") SET(CMAKE_C_FLAGS "-DHAS_USERPTR ${CMAKE_C_FLAGS}") endif (DRM_INTEL_USERPTR) +if (OCL_IMAGE2D_BUFFER) +SET(CMAKE_CXX_FLAGS "-DHAS_OCLImage2dBuffer ${CMAKE_CXX_FLAGS}") +SET(CMAKE_C_FLAGS "-DHAS_OCLImage2dBuffer ${CMAKE_C_FLAGS}") +endif (OCL_IMAGE2D_BUFFER) + set(GIT_SHA1 "git_sha1.h") add_custom_target(${GIT_SHA1} ALL COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh diff --git a/src/cl_api.c b/src/cl_api.c index 972c687..04095a2 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -548,6 +548,14 @@ clCreateImage(cl_context context, err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; } +#ifdef HAS_OCLImage2dBuffer + if ((image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER && + image_desc->image_type != CL_MEM_OBJECT_IMAGE2D) && + image_desc->buffer) { + err = CL_INVALID_IMAGE_DESCRIPTOR; + goto error; + } +#else /* buffer refers to a valid buffer memory object if image_type is CL_MEM_OBJECT_IMAGE1D_BUFFER. Otherwise it must be NULL. */ if (image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER && @@ -555,6 +563,7 @@ clCreateImage(cl_context context, err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; } +#endif if (image_desc->num_mip_levels || image_desc->num_samples) { err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 5ef0bde..c47d48c 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -571,6 +571,10 @@ cl_get_device_info(cl_device_id device, DECL_FIELD(PARTITION_AFFINITY_DOMAIN, affinity_domain) DECL_FIELD(PARTITION_TYPE, partition_type) DECL_FIELD(REFERENCE_COUNT, device_reference_count) +#ifdef HAS_OCLImage2dBuffer + DECL_FIELD(IMAGE_PITCH_ALIGNMENT, image_pitch_alignment) + DECL_FIELD(IMAGE_BASE_ADDRESS_ALIGNMENT, image_base_address_alignment) +#endif case CL_DRIVER_VERSION: if (param_value_size_ret) { diff --git a/src/cl_device_id.h b/src/cl_device_id.h index ee6a8e6..8d8adac 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -113,6 +113,10 @@ struct _cl_device_id { cl_device_affinity_domain affinity_domain; cl_device_partition_property partition_type[3]; cl_uint device_reference_count; +#ifdef HAS_OCLImage2dBuffer + cl_uint image_pitch_alignment; + cl_uint image_base_address_alignment; +#endif }; /* Get a device from the given platform */ diff --git a/src/cl_extensions.c b/src/cl_extensions.c index d07a525..e31386f 100644 --- a/src/cl_extensions.c +++ b/src/cl_extensions.c @@ -34,7 +34,7 @@ void check_opt1_extension(cl_extensions_t *extensions) { int id; for(id = OPT1_EXT_START_ID; id <= OPT1_EXT_END_ID; id++) - if (id == EXT_ID(khr_icd)) + if (id == EXT_ID(khr_icd) || id == EXT_ID(khr_image2d_from_buffer)) extensions->extensions[id].base.ext_enabled = 1; } diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index 37abfd2..f9c5ad4 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -124,4 +124,8 @@ DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING) .affinity_domain = 0, .partition_type = {0}, .device_reference_count = 1, +#ifdef HAS_OCLImage2dBuffer +.image_pitch_alignment = 4, +.image_base_address_alignment = 4, +#endif diff --git a/src/cl_mem.c b/src/cl_mem.c index 3323897..ab8efa8 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -673,6 +673,7 @@ _cl_mem_new_image(cl_context ctx, size_t pitch, size_t slice_pitch, void *data, + int force_no_tiling, cl_int *errcode_ret) { cl_int err = CL_SUCCESS; @@ -736,7 +737,7 @@ _cl_mem_new_image(cl_context ctx, if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; /* Pick up tiling mode (we do only linear on SNB) */ - if (cl_driver_get_ver(ctx->drv) != 6) + if (!force_no_tiling && cl_driver_get_ver(ctx->drv) != 6) tiling = cl_get_default_tiling(ctx->drv); depth = 1; @@ -935,7 +936,7 @@ _cl_mem_new_image_from_buffer(cl_context ctx, // We have to create a new image, and copy the buffer data to this new image. // And replace all the buffer object's reference to this image. image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type, - mem_buffer->base.size / bpp, 0, 0, 0, 0, NULL, errcode_ret); + mem_buffer->base.size / bpp, 0, 0, 0, 0, NULL, 1, errcode_ret); if (image == NULL) return NULL; void *src = cl_mem_map(buffer, 0); @@ -953,7 +954,7 @@ _cl_mem_new_image_from_buffer(cl_context ctx, if (err != 0) goto error; - + // Now replace buffer's bo to this new bo, need to take care of sub buffer // case. cl_mem_replace_buffer(buffer, image->bo); @@ -974,6 +975,128 @@ error: return image; } +#ifdef HAS_OCLImage2dBuffer +static cl_mem +_cl_mem_new_image2d_from_buffer(cl_context ctx, + cl_mem_flags flags, + const cl_image_format* image_format, + const cl_image_desc *image_desc, + cl_int *errcode_ret) +{ + cl_mem image = NULL; + cl_mem buffer = image_desc->buffer; + cl_int err = CL_SUCCESS; + *errcode_ret = err; + cl_mem_flags merged_flags; + uint32_t bpp; + uint32_t intel_fmt = INTEL_UNSUPPORTED_FORMAT; + size_t offset = 0; + + /* Get the size of each pixel */ + if (UNLIKELY((err = cl_image_byte_per_pixel(image_format, &bpp)) != CL_SUCCESS)) + goto error; + + /* Only a sub-set of the formats are supported */ + intel_fmt = cl_image_get_intel_format(image_format); + if (UNLIKELY(intel_fmt == INTEL_UNSUPPORTED_FORMAT)) { + err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + goto error; + } + + if (!buffer) { + err = CL_INVALID_IMAGE_DESCRIPTOR; + goto error; + } + + if (flags & (CL_MEM_USE_HOST_PTR|CL_MEM_ALLOC_HOST_PTR|CL_MEM_COPY_HOST_PTR)) { + err = CL_INVALID_IMAGE_DESCRIPTOR; + goto error; + } + + /* access check. */ + if ((buffer->flags & CL_MEM_WRITE_ONLY) && + (flags & (CL_MEM_READ_WRITE|CL_MEM_READ_ONLY))) { + err = CL_INVALID_VALUE; + goto error; + } + if ((buffer->flags & CL_MEM_READ_ONLY) && + (flags & (CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY))) { + err = CL_INVALID_VALUE; + goto error; + } + if ((buffer->flags & CL_MEM_HOST_WRITE_ONLY) && + (flags & CL_MEM_HOST_READ_ONLY)) { + err = CL_INVALID_VALUE; + goto error; + } + if ((buffer->flags & CL_MEM_HOST_READ_ONLY) && + (flags & CL_MEM_HOST_WRITE_ONLY)) { + err = CL_INVALID_VALUE; + goto error; + } + if ((buffer->flags & CL_MEM_HOST_NO_ACCESS) && + (flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY))) { + err = CL_INVALID_VALUE; + goto error; + } + + if (image_desc->image_width * image_desc->image_height * bpp > buffer->size) { + err = CL_INVALID_IMAGE_DESCRIPTOR; + goto error; + } + + merged_flags = buffer->flags; + if (flags & (CL_MEM_READ_WRITE|CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY)) { + merged_flags &= ~(CL_MEM_READ_WRITE|CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY); + merged_flags |= flags & (CL_MEM_READ_WRITE|CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY); + } + if (flags & (CL_MEM_HOST_WRITE_ONLY|CL_MEM_HOST_READ_ONLY|CL_MEM_HOST_NO_ACCESS)) { + merged_flags &= ~(CL_MEM_HOST_WRITE_ONLY|CL_MEM_HOST_READ_ONLY|CL_MEM_HOST_NO_ACCESS); + merged_flags |= flags & (CL_MEM_HOST_WRITE_ONLY|CL_MEM_HOST_READ_ONLY|CL_MEM_HOST_NO_ACCESS); + } + struct _cl_mem_buffer *mem_buffer = (struct _cl_mem_buffer*)buffer; + if (buffer->type == CL_MEM_SUBBUFFER_TYPE) { + offset = ((struct _cl_mem_buffer *)buffer)->sub_offset; + mem_buffer = mem_buffer->parent; + } + /* Get the size of each pixel */ + if (UNLIKELY((err = cl_image_byte_per_pixel(image_format, &bpp)) != CL_SUCCESS)) + goto error; + + // Per bspec, a image should has a at least 2 line vertical alignment, + // thus we can't simply attach a buffer to a 1d image surface which has the same size. + // We have to create a new image, and copy the buffer data to this new image. + // And replace all the buffer object's reference to this image. + image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type, + image_desc->image_width, image_desc->image_height, 0, 0, 0, NULL, 1, errcode_ret); + if (image == NULL) + return NULL; + + if (err != 0) + goto error; + + // Now replace buffer's bo to this new bo, need to take care of sub buffer + // case. + cl_mem_replace_buffer(image, buffer->bo); + /* Now point to the right offset if buffer is a SUB_BUFFER. */ + if (buffer->flags & CL_MEM_USE_HOST_PTR) + image->host_ptr = buffer->host_ptr + offset; + cl_mem_image(image)->offset = offset; + cl_mem_image(image)->w = image_desc->image_width; + cl_mem_image(image)->h = image_desc->image_height; + cl_mem_add_ref(buffer); + cl_mem_image(image)->buffer_1d = buffer; + return image; + +error: + if (image) + cl_mem_delete(image); + image = NULL; + *errcode_ret = err; + return image; +} +#endif + LOCAL cl_mem cl_mem_new_image(cl_context context, cl_mem_flags flags, @@ -983,19 +1106,37 @@ cl_mem_new_image(cl_context context, cl_int *errcode_ret) { switch (image_desc->image_type) { +#ifdef HAS_OCLImage2dBuffer + case CL_MEM_OBJECT_IMAGE1D: + case CL_MEM_OBJECT_IMAGE3D: + return _cl_mem_new_image(context, flags, image_format, image_desc->image_type, + image_desc->image_width, image_desc->image_height, image_desc->image_depth, + image_desc->image_row_pitch, image_desc->image_slice_pitch, + host_ptr, 0, errcode_ret); + case CL_MEM_OBJECT_IMAGE2D: + if (image_desc->buffer) + return _cl_mem_new_image2d_from_buffer(context, flags, image_format, + image_desc, errcode_ret); + else + return _cl_mem_new_image(context, flags, image_format, image_desc->image_type, + image_desc->image_width, image_desc->image_height, image_desc->image_depth, + image_desc->image_row_pitch, image_desc->image_slice_pitch, + host_ptr, 0, errcode_ret); +#else case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE3D: return _cl_mem_new_image(context, flags, image_format, image_desc->image_type, image_desc->image_width, image_desc->image_height, image_desc->image_depth, image_desc->image_row_pitch, image_desc->image_slice_pitch, - host_ptr, errcode_ret); + host_ptr, 0, errcode_ret); +#endif case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE2D_ARRAY: return _cl_mem_new_image(context, flags, image_format, image_desc->image_type, image_desc->image_width, image_desc->image_height, image_desc->image_array_size, image_desc->image_row_pitch, image_desc->image_slice_pitch, - host_ptr, errcode_ret); + host_ptr, 0, errcode_ret); case CL_MEM_OBJECT_IMAGE1D_BUFFER: return _cl_mem_new_image_from_buffer(context, flags, image_format, image_desc, errcode_ret); @@ -1024,7 +1165,12 @@ cl_mem_delete(cl_mem mem) /* iff we are a image, delete the 1d buffer if has. */ if (IS_IMAGE(mem)) { if (cl_mem_image(mem)->buffer_1d) { +#ifdef HAS_OCLImage2dBuffer + assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER || + cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D); +#else assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER); +#endif cl_mem_delete(cl_mem_image(mem)->buffer_1d); cl_mem_image(mem)->buffer_1d = NULL; } diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 8cc8b43..3736652 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -242,6 +242,12 @@ else() SET(UTESTS_REQUIRED_EGL_LIB "") endif() +if (OCL_IMAGE2D_BUFFER) +SET(utests_sources ${utests_sources} image_2D_buffer.cpp) +SET(CMAKE_CXX_FLAGS "-DHAS_OCLImage2dBuffer ${CMAKE_CXX_FLAGS} ${DEF_OCL_PCH_PCM_PATH}") +SET(CMAKE_C_FLAGS "-DHAS_OCLImage2dBuffer ${CMAKE_C_FLAGS} ${DEF_OCL_PCH_PCM_PATH}") +endif () + if (COMPILER STREQUAL "CLANG") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-tautological-compare") endif () diff --git a/utests/image_2D_buffer.cpp b/utests/image_2D_buffer.cpp new file mode 100644 index 0000000..e6e88d6 --- /dev/null +++ b/utests/image_2D_buffer.cpp @@ -0,0 +1,89 @@ +#include <string.h> +#include "utest_helper.hpp" + +#define TEST_SIZE 1024 + +void image_2D_buffer(void) +{ + size_t pix_w = TEST_SIZE; + size_t pix_h = TEST_SIZE; + size_t buffer_sz = pix_w * pix_h * sizeof(uint32_t); + char *buf_content = (char *)malloc(buffer_sz * sizeof(char)); + int error; + cl_image_desc image_desc; + cl_image_format image_format; + cl_sampler sampler; + cl_mem image1, image2; + cl_mem ret_mem = NULL; + + OCL_CREATE_KERNEL("image_2D_buffer"); + + for (int32_t i = 0; i < (int32_t)buffer_sz; ++i) + buf_content[i] = (rand() & 127); + + cl_mem buff = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + buffer_sz, buf_content, &error); + OCL_ASSERT(error == CL_SUCCESS); + + memset(&image_desc, 0x0, sizeof(cl_image_desc)); + memset(&image_format, 0x0, sizeof(cl_image_format)); + + image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; + image_desc.image_row_pitch = pix_w * sizeof(uint32_t); + image_desc.image_width = pix_w; + image_desc.image_height = pix_h; + image_desc.image_array_size = 1; + image_desc.buffer = buff; + + image_format.image_channel_order = CL_RGBA; + image_format.image_channel_data_type = CL_UNSIGNED_INT8; + + image1 = clCreateImage(ctx, CL_MEM_READ_ONLY, &image_format, + &image_desc, NULL, &error ); + OCL_ASSERT(error == CL_SUCCESS); + + error = clGetImageInfo(image1, CL_IMAGE_BUFFER, sizeof(ret_mem), &ret_mem, NULL); + OCL_ASSERT(error == CL_SUCCESS); + OCL_ASSERT(ret_mem == buff); + + + memset(&image_desc, 0x0, sizeof(cl_image_desc)); + image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; + image_desc.image_width = pix_w; + image_desc.image_height = pix_h; + image2 = clCreateImage(ctx, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, + &image_format, &image_desc, buf_content, &error); + OCL_ASSERT(error == CL_SUCCESS); + + // Create sampler to use + sampler = clCreateSampler(ctx, false, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error ); + OCL_ASSERT(error == CL_SUCCESS); + + cl_mem result_buf = buf[0] = clCreateBuffer(ctx, 0, buffer_sz, NULL, &error); + OCL_ASSERT(error == CL_SUCCESS); + + OCL_SET_ARG(0, sizeof(cl_mem), &image1); + OCL_SET_ARG(1, sizeof(cl_mem), &image2); + OCL_SET_ARG(2, sizeof(sampler), &sampler); + OCL_SET_ARG(3, sizeof(cl_mem), &result_buf); + + globals[0] = pix_w; + globals[1] = pix_h; + locals[0] = 16; + locals[1] = 16; + + OCL_NDRANGE(2); + + /* Now check the result. */ + OCL_MAP_BUFFER(0); + for (uint32_t i = 0; i < buffer_sz/sizeof(uint32_t); i++) + OCL_ASSERT(((uint32_t*)buf_data[0])[i] == 1); + OCL_UNMAP_BUFFER(0); + + clReleaseSampler(sampler); + clReleaseMemObject(image1); + clReleaseMemObject(image2); + clReleaseMemObject(buff); +} + +MAKE_UTEST_FROM_FUNCTION(image_2D_buffer); -- 1.9.3 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet