the current limitation is both value and size of host_ptr should be page aligned, remove the limitation by recording the offset to the page starting address inside the driver.
tests verified: beignet/utest, beignet/benchmark and conformance/basic, conformance/buffers, conformance/mem_host_flags Signed-off-by: Guo Yejun <[email protected]> --- benchmark/benchmark_use_host_ptr_buffer.cpp | 13 ++++++++++--- src/cl_command_queue.c | 4 ++-- src/cl_mem.c | 12 ++++++++---- src/cl_mem.h | 1 + utests/runtime_use_host_ptr_buffer.cpp | 15 +++++++++++---- 5 files changed, 32 insertions(+), 13 deletions(-) diff --git a/benchmark/benchmark_use_host_ptr_buffer.cpp b/benchmark/benchmark_use_host_ptr_buffer.cpp index 7ede576..0021290 100644 --- a/benchmark/benchmark_use_host_ptr_buffer.cpp +++ b/benchmark/benchmark_use_host_ptr_buffer.cpp @@ -5,13 +5,20 @@ int benchmark_use_host_ptr_buffer(void) { struct timeval start,stop; - const size_t n = 4096*4096; + const size_t n = 4096*4096 + 256; // Setup kernel and buffers OCL_CREATE_KERNEL("runtime_use_host_ptr_buffer"); - int ret = posix_memalign(&buf_data[0], 4096, sizeof(uint32_t) * n); - OCL_ASSERT(ret == 0); + buf_data[0] = malloc(sizeof(uint32_t) * n); + + //it does not matter if buf_data[0] is page aligned or not, + //here, just to test the case that it is not page aligned. + while ((unsigned long)buf_data[0] % 4096 == 0) + { + free(buf_data[0]); + buf_data[0] = malloc(sizeof(uint32_t) * n); + } for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = i; OCL_CREATE_BUFFER(buf[0], CL_MEM_USE_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 12530d7..62fd810 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -170,9 +170,9 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) offset = interp_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i); if (k->args[i].mem->type == CL_MEM_SUBBUFFER_TYPE) { struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)k->args[i].mem; - cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, buffer->sub_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i)); + cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset + buffer->sub_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i)); } else { - cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, 0, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i)); + cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i)); } } diff --git a/src/cl_mem.c b/src/cl_mem.c index 3055bea..3b3421c 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -254,6 +254,7 @@ cl_mem_allocate(enum cl_mem_type type, mem->magic = CL_MAGIC_MEM_HEADER; mem->flags = flags; mem->is_userptr = 0; + mem->offset = 0; if (sz != 0) { /* Pinning will require stricter alignment rules */ @@ -273,10 +274,11 @@ cl_mem_allocate(enum cl_mem_type type, assert(host_ptr != NULL); /* userptr not support tiling */ if (!is_tiled) { - if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) { - mem->is_userptr = 1; - mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0); - } + void* aligned_host_ptr = (void*)(((unsigned long)host_ptr) & (~(page_size - 1))); + mem->offset = host_ptr - aligned_host_ptr; + mem->is_userptr = 1; + size_t aligned_sz = ALIGN((mem->offset + sz), page_size); + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0); } } else if (flags & CL_MEM_ALLOC_HOST_PTR) { @@ -502,6 +504,8 @@ cl_mem_new_sub_buffer(cl_mem buffer, mem->ref_n = 1; mem->magic = CL_MAGIC_MEM_HEADER; mem->flags = flags; + mem->offset = buffer->offset; + mem->is_userptr = buffer->is_userptr; sub_buf->parent = (struct _cl_mem_buffer*)buffer; cl_mem_add_ref(buffer); diff --git a/src/cl_mem.h b/src/cl_mem.h index 1641dcc..ffe46a3 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -93,6 +93,7 @@ typedef struct _cl_mem { uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */ cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */ uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/ + size_t offset; /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/ } _cl_mem; struct _cl_mem_image { diff --git a/utests/runtime_use_host_ptr_buffer.cpp b/utests/runtime_use_host_ptr_buffer.cpp index 79273c3..4ae5379 100644 --- a/utests/runtime_use_host_ptr_buffer.cpp +++ b/utests/runtime_use_host_ptr_buffer.cpp @@ -2,13 +2,20 @@ static void runtime_use_host_ptr_buffer(void) { - const size_t n = 4096*100; + const size_t n = 4096*10 + 1111; // Setup kernel and buffers OCL_CREATE_KERNEL("runtime_use_host_ptr_buffer"); - int ret = posix_memalign(&buf_data[0], 4096, sizeof(uint32_t) * n); - OCL_ASSERT(ret == 0); + buf_data[0] = malloc(sizeof(uint32_t) * n); + + //it does not matter if buf_data[0] is page aligned or not, + //here, just to test the case that it is not page aligned. + while ((unsigned long)buf_data[0] % 4096 == 0) + { + free(buf_data[0]); + buf_data[0] = malloc(sizeof(uint32_t) * n); + } for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = i; OCL_CREATE_BUFFER(buf[0], CL_MEM_USE_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); @@ -16,7 +23,7 @@ static void runtime_use_host_ptr_buffer(void) // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = n; - locals[0] = 256; + locals[0] = 1; OCL_NDRANGE(1); // Check result -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
