LGTM, pushed, thanks.
On Fri, Aug 23, 2013 at 11:04:21AM +0800, Yang Rong wrote: > There is a unsync map function drm_intel_gem_bo_map_unsynchronized in drm, > that can > be used to do non-blocking map. But this function only map gtt, so force to > use map > gtt for all clEnqueueMapBuffer and clEnqueueMapImage. > > V2: refined comment, and using map_gtt_unsync in clEnqueueMapBuffer/Image > instead of map_auto to avoid confuse. > > Signed-off-by: Yang Rong <[email protected]> > --- > src/cl_api.c | 81 > ++++++++++++++++++++++++++++++++++++++++++++++-- > src/cl_driver.h | 4 +++ > src/cl_driver_defs.c | 1 + > src/cl_enqueue.c | 76 ++++++--------------------------------------- > src/cl_mem.c | 8 +++++ > src/cl_mem.h | 3 ++ > src/intel/intel_driver.c | 1 + > 7 files changed, 105 insertions(+), 69 deletions(-) > > diff --git a/src/cl_api.c b/src/cl_api.c > index 4f048ee..67446e2 100644 > --- a/src/cl_api.c > +++ b/src/cl_api.c > @@ -1576,6 +1576,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue, > cl_int * errcode_ret) > { > cl_int err = CL_SUCCESS; > + void *ptr = NULL; > + void *mem_ptr = NULL; > + cl_int slot = -1; > enqueue_data *data, no_wait_data = { 0 }; > > CHECK_QUEUE(command_queue); > @@ -1602,6 +1605,69 @@ clEnqueueMapBuffer(cl_command_queue command_queue, > goto error; > } > > + if (!(ptr = cl_mem_map_gtt_unsync(buffer))) { > + err = CL_MAP_FAILURE; > + goto error; > + } > + > + ptr = (char*)ptr + offset; > + > + if(buffer->flags & CL_MEM_USE_HOST_PTR) { > + assert(buffer->host_ptr); > + //only calc ptr here, will do memcpy in enqueue > + mem_ptr = buffer->host_ptr + offset; > + } else { > + mem_ptr = ptr; > + } > + > + /* Record the mapped address. */ > + if (!buffer->mapped_ptr_sz) { > + buffer->mapped_ptr_sz = 16; > + buffer->mapped_ptr = (cl_mapped_ptr *)malloc( > + sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz); > + if (!buffer->mapped_ptr) { > + cl_mem_unmap_gtt (buffer); > + err = CL_OUT_OF_HOST_MEMORY; > + ptr = NULL; > + goto error; > + } > + > + memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * > sizeof(cl_mapped_ptr)); > + slot = 0; > + } else { > + int i = 0; > + for (; i < buffer->mapped_ptr_sz; i++) { > + if (buffer->mapped_ptr[i].ptr == NULL) { > + slot = i; > + break; > + } > + } > + > + if (i == buffer->mapped_ptr_sz) { > + cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc( > + sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2); > + if (!new_ptr) { > + cl_mem_unmap_gtt (buffer); > + err = CL_OUT_OF_HOST_MEMORY; > + ptr = NULL; > + goto error; > + } > + memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr)); > + memcpy(new_ptr, buffer->mapped_ptr, > + buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr)); > + slot = buffer->mapped_ptr_sz; > + buffer->mapped_ptr_sz *= 2; > + free(buffer->mapped_ptr); > + buffer->mapped_ptr = new_ptr; > + } > + } > + > + assert(slot != -1); > + buffer->mapped_ptr[slot].ptr = mem_ptr; > + buffer->mapped_ptr[slot].v_ptr = ptr; > + buffer->mapped_ptr[slot].size = size; > + buffer->map_ref++; > + > TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, > event, buffer->ctx); > > data = &no_wait_data; > @@ -1610,6 +1676,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue, > data->offset = offset; > data->size = size; > data->map_flags = map_flags; > + data->ptr = ptr; > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_READ_BUFFER) == > CL_ENQUEUE_EXECUTE_IMM) { > @@ -1620,7 +1687,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue, > error: > if (errcode_ret) > *errcode_ret = err; > - return data->ptr; > + return mem_ptr; > } > > void * > @@ -1638,6 +1705,7 @@ clEnqueueMapImage(cl_command_queue command_queue, > cl_int * errcode_ret) > { > cl_int err = CL_SUCCESS; > + void *ptr = NULL; > enqueue_data *data, no_wait_data = { 0 }; > > CHECK_QUEUE(command_queue); > @@ -1673,6 +1741,14 @@ clEnqueueMapImage(cl_command_queue command_queue, > goto error; > } > > + if (!(ptr = cl_mem_map_gtt_unsync(image))) { > + err = CL_MAP_FAILURE; > + goto error; > + } > + > + size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + > image->slice_pitch*origin[2]; > + ptr = (char*)ptr + offset; > + > TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, > event, image->ctx); > > data = &no_wait_data; > @@ -1683,6 +1759,7 @@ clEnqueueMapImage(cl_command_queue command_queue, > data->row_pitch = *image_row_pitch; > data->slice_pitch = *image_slice_pitch; > data->map_flags = map_flags; > + data->ptr = ptr; > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_READ_BUFFER) == > CL_ENQUEUE_EXECUTE_IMM) { > @@ -1693,7 +1770,7 @@ clEnqueueMapImage(cl_command_queue command_queue, > error: > if (errcode_ret) > *errcode_ret = err; > - return data->ptr; //TODO: map and unmap first > + return ptr; //TODO: map and unmap first > } > > cl_int > diff --git a/src/cl_driver.h b/src/cl_driver.h > index 1a0ec38..0ce03fe 100644 > --- a/src/cl_driver.h > +++ b/src/cl_driver.h > @@ -257,6 +257,10 @@ extern cl_buffer_unmap_cb *cl_buffer_unmap; > typedef int (cl_buffer_map_gtt_cb)(cl_buffer); > extern cl_buffer_map_gtt_cb *cl_buffer_map_gtt; > > +/* Map a buffer in the GTT domain, non waiting the GPU read or write*/ > +typedef int (cl_buffer_map_gtt_unsync_cb)(cl_buffer); > +extern cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync; > + > /* Unmap a buffer in the GTT domain */ > typedef int (cl_buffer_unmap_gtt_cb)(cl_buffer); > extern cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt; > diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c > index e7412de..7c4c866 100644 > --- a/src/cl_driver_defs.c > +++ b/src/cl_driver_defs.c > @@ -36,6 +36,7 @@ LOCAL cl_buffer_unreference_cb *cl_buffer_unreference = > NULL; > LOCAL cl_buffer_map_cb *cl_buffer_map = NULL; > LOCAL cl_buffer_unmap_cb *cl_buffer_unmap = NULL; > LOCAL cl_buffer_map_gtt_cb *cl_buffer_map_gtt = NULL; > +LOCAL cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync = NULL; > LOCAL cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt = NULL; > LOCAL cl_buffer_get_virtual_cb *cl_buffer_get_virtual = NULL; > LOCAL cl_buffer_get_size_cb *cl_buffer_get_size = NULL; > diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c > index a112cc4..a914e26 100644 > --- a/src/cl_enqueue.c > +++ b/src/cl_enqueue.c > @@ -156,93 +156,35 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data) > > void *ptr = NULL; > cl_int err = CL_SUCCESS; > - void *mem_ptr = NULL; > - cl_int slot = -1; > cl_mem buffer = data->mem_obj; > - > - if (!(ptr = cl_mem_map_auto(buffer))) { > + //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here > + if (!(ptr = cl_mem_map_gtt(buffer))) { > err = CL_MAP_FAILURE; > + goto error; > } > - > ptr = (char*)ptr + data->offset; > + assert(data->ptr == ptr); > > if(buffer->flags & CL_MEM_USE_HOST_PTR) { > assert(buffer->host_ptr); > memcpy(buffer->host_ptr + data->offset, ptr, data->size); > - mem_ptr = buffer->host_ptr + data->offset; > - } else { > - mem_ptr = ptr; > - } > - > - /* Record the mapped address. */ > - if (!buffer->mapped_ptr_sz) { > - buffer->mapped_ptr_sz = 16; > - buffer->mapped_ptr = (cl_mapped_ptr *)malloc( > - sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz); > - if (!buffer->mapped_ptr) { > - cl_mem_unmap_auto (buffer); > - err = CL_OUT_OF_HOST_MEMORY; > - ptr = NULL; > - goto error; > - } > - > - memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * > sizeof(cl_mapped_ptr)); > - slot = 0; > - } else { > - int i = 0; > - for (; i < buffer->mapped_ptr_sz; i++) { > - if (buffer->mapped_ptr[i].ptr == NULL) { > - slot = i; > - break; > - } > - } > - > - if (i == buffer->mapped_ptr_sz) { > - cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc( > - sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2); > - if (!new_ptr) { > - cl_mem_unmap_auto (buffer); > - err = CL_OUT_OF_HOST_MEMORY; > - ptr = NULL; > - goto error; > - } > - memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr)); > - memcpy(new_ptr, buffer->mapped_ptr, > - buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr)); > - slot = buffer->mapped_ptr_sz; > - buffer->mapped_ptr_sz *= 2; > - free(buffer->mapped_ptr); > - buffer->mapped_ptr = new_ptr; > - } > } > > - assert(slot != -1); > - buffer->mapped_ptr[slot].ptr = mem_ptr; > - buffer->mapped_ptr[slot].v_ptr = ptr; > - buffer->mapped_ptr[slot].size = data->size; > - buffer->map_ref++; > - > - data->ptr = mem_ptr; > - > error: > return err; > } > > cl_int cl_enqueue_map_image(enqueue_data *data) > { > - void *ptr = NULL; > cl_int err = CL_SUCCESS; > - > cl_mem image = data->mem_obj; > - const size_t *origin = data->origin; > - > - if (!(ptr = cl_mem_map_auto(image))) { > + void *ptr = NULL; > + //because using unsync map in clEnqueueMapImage, so force use map_gtt here > + if (!(ptr = cl_mem_map_gtt(image))) { > err = CL_MAP_FAILURE; > goto error; > } > - > - size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + > image->slice_pitch*origin[2]; > - data->ptr = (char*)ptr + offset; > + assert(data->ptr == (char*)ptr + data->offset); > > error: > return err; > @@ -282,7 +224,7 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data) > assert(v_ptr == mapped_ptr); > } > > - cl_mem_unmap_auto(memobj); > + cl_mem_unmap_gtt(memobj); > > /* shrink the mapped slot. */ > if (memobj->mapped_ptr_sz/2 > memobj->map_ref) { > diff --git a/src/cl_mem.c b/src/cl_mem.c > index f794ce7..4819096 100644 > --- a/src/cl_mem.c > +++ b/src/cl_mem.c > @@ -552,6 +552,14 @@ cl_mem_map_gtt(cl_mem mem) > return cl_buffer_get_virtual(mem->bo); > } > > +LOCAL void * > +cl_mem_map_gtt_unsync(cl_mem mem) > +{ > + cl_buffer_map_gtt_unsync(mem->bo); > + assert(cl_buffer_get_virtual(mem->bo)); > + return cl_buffer_get_virtual(mem->bo); > +} > + > LOCAL cl_int > cl_mem_unmap_gtt(cl_mem mem) > { > diff --git a/src/cl_mem.h b/src/cl_mem.h > index 1b1709a..d25f0ed 100644 > --- a/src/cl_mem.h > +++ b/src/cl_mem.h > @@ -123,6 +123,9 @@ extern cl_int cl_mem_unmap(cl_mem); > /* Directly map a memory object in GTT mode */ > extern void *cl_mem_map_gtt(cl_mem); > > +/* Directly map a memory object in GTT mode, with out waiting gpu idle */ > +extern void *cl_mem_map_gtt_unsync(cl_mem); > + > /* Unmap a memory object in GTT mode */ > extern cl_int cl_mem_unmap_gtt(cl_mem); > > diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c > index 6c6b9fb..9959447 100644 > --- a/src/intel/intel_driver.c > +++ b/src/intel/intel_driver.c > @@ -519,6 +519,7 @@ intel_setup_callbacks(void) > cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap; > cl_buffer_map_gtt = (cl_buffer_map_gtt_cb *) drm_intel_gem_bo_map_gtt; > cl_buffer_unmap_gtt = (cl_buffer_unmap_gtt_cb *) > drm_intel_gem_bo_unmap_gtt; > + cl_buffer_map_gtt_unsync = (cl_buffer_map_gtt_unsync_cb *) > drm_intel_gem_bo_map_unsynchronized; > cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) > drm_intel_bo_get_virtual; > cl_buffer_get_size = (cl_buffer_get_size_cb *) drm_intel_bo_get_size; > cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin; > -- > 1.8.1.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
