LGTM, will push latter. Thanks.
On Tue, May 20, 2014 at 10:46:19AM +0800, Yang Rong wrote: > After max allocate size is changed to 256M, the large memory object would map > gtt > fail in some system. So when image size is large then 128M, disable tiling, > and > used normal map. But in function clEnqueueMapBuffer/Image, may still fail > because > unsync map. > > Signed-off-by: Yang Rong <[email protected]> > --- > src/cl_api.c | 55 ++++++++++++++++++++++++++++++++----------------------- > src/cl_enqueue.c | 27 ++++++++++++++++++--------- > src/cl_enqueue.h | 1 + > src/cl_mem.c | 18 ++++++++++++++++-- > src/cl_mem.h | 1 + > 5 files changed, 68 insertions(+), 34 deletions(-) > > diff --git a/src/cl_api.c b/src/cl_api.c > index 03a1cda..0800c09 100644 > --- a/src/cl_api.c > +++ b/src/cl_api.c > @@ -2161,7 +2161,7 @@ error: > return err; > } > > -static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t > offset, size_t size) > +static cl_int _cl_map_mem(cl_mem mem, void *ptr, void **mem_ptr, size_t > offset, size_t size) > { > cl_int slot = -1; > int err = CL_SUCCESS; > @@ -2172,17 +2172,13 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, > void **mem_ptr, size_t offset, > sub_offset = buffer->sub_offset; > } > > - if (!(*ptr = cl_mem_map_gtt_unsync(mem))) { > - err = CL_MAP_FAILURE; > - goto error; > - } > - *ptr = (char*)(*ptr) + offset + sub_offset; > + ptr = (char*)ptr + offset + sub_offset; > if(mem->flags & CL_MEM_USE_HOST_PTR) { > assert(mem->host_ptr); > //only calc ptr here, will do memcpy in enqueue > *mem_ptr = mem->host_ptr + offset + sub_offset; > } else { > - *mem_ptr = *ptr; > + *mem_ptr = ptr; > } > /* Record the mapped address. */ > if (!mem->mapped_ptr_sz) { > @@ -2190,7 +2186,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void > **mem_ptr, size_t offset, > mem->mapped_ptr = (cl_mapped_ptr *)malloc( > sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz); > if (!mem->mapped_ptr) { > - cl_mem_unmap_gtt(mem); > + cl_mem_unmap_auto(mem); > err = CL_OUT_OF_HOST_MEMORY; > goto error; > } > @@ -2208,7 +2204,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void > **mem_ptr, size_t offset, > cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc( > sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz * 2); > if (!new_ptr) { > - cl_mem_unmap_gtt (mem); > + cl_mem_unmap_auto(mem); > err = CL_OUT_OF_HOST_MEMORY; > goto error; > } > @@ -2223,7 +2219,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void > **mem_ptr, size_t offset, > } > assert(slot != -1); > mem->mapped_ptr[slot].ptr = *mem_ptr; > - mem->mapped_ptr[slot].v_ptr = *ptr; > + mem->mapped_ptr[slot].v_ptr = ptr; > mem->mapped_ptr[slot].size = size; > mem->map_ref++; > error: > @@ -2270,10 +2266,6 @@ clEnqueueMapBuffer(cl_command_queue command_queue, > goto error; > } > > - err = _cl_map_mem(buffer, &ptr, &mem_ptr, offset, size); > - if (err != CL_SUCCESS) > - goto error; > - > TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, > event, buffer->ctx); > > data = &no_wait_data; > @@ -2282,12 +2274,25 @@ clEnqueueMapBuffer(cl_command_queue command_queue, > data->offset = offset; > data->size = size; > data->ptr = ptr; > + data->unsync_map = 1; > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_MAP_BUFFER) == > CL_ENQUEUE_EXECUTE_IMM) { > + data->unsync_map = 0; > err = cl_enqueue_handle(event ? *event : NULL, data); > + if (err != CL_SUCCESS) > + goto error; > + ptr = data->ptr; > if(event) cl_event_set_status(*event, CL_COMPLETE); > + } else { > + if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) { > + err = CL_MAP_FAILURE; > + goto error; > + } > } > + err = _cl_map_mem(buffer, ptr, &mem_ptr, offset, size); > + if (err != CL_SUCCESS) > + goto error; > > error: > if (errcode_ret) > @@ -2344,11 +2349,6 @@ clEnqueueMapImage(cl_command_queue command_queue, > goto error; > } > > - if (!(ptr = cl_mem_map_gtt_unsync(mem))) { > - err = CL_MAP_FAILURE; > - goto error; > - } > - > size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + > image->slice_pitch*origin[2]; > size_t size; > if(region[2] == 1) { > @@ -2362,10 +2362,6 @@ clEnqueueMapImage(cl_command_queue command_queue, > size += image->bpp * (origin[0] + region[0]); > } > > - err = _cl_map_mem(mem, &ptr, &mem_ptr, offset, size); > - if (err != CL_SUCCESS) > - goto error; > - > TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, > event, mem->ctx); > > data = &no_wait_data; > @@ -2378,12 +2374,25 @@ clEnqueueMapImage(cl_command_queue command_queue, > data->slice_pitch = *image_slice_pitch; > data->ptr = ptr; > data->offset = offset; > + data->unsync_map = 1; > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_MAP_IMAGE) == > CL_ENQUEUE_EXECUTE_IMM) { > + data->unsync_map = 0; > err = cl_enqueue_handle(event ? *event : NULL, data); > + if (err != CL_SUCCESS) > + goto error; > + ptr = data->ptr; > if(event) cl_event_set_status(*event, CL_COMPLETE); > + } else { > + if ((ptr = cl_mem_map_gtt_unsync(mem)) == NULL) { > + err = CL_MAP_FAILURE; > + goto error; > + } > } > + err = _cl_map_mem(mem, ptr, &mem_ptr, offset, size); > + if (err != CL_SUCCESS) > + goto error; > > error: > if (errcode_ret) > diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c > index 330d230..800668d 100644 > --- a/src/cl_enqueue.c > +++ b/src/cl_enqueue.c > @@ -246,17 +246,21 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data) > mem->type == CL_MEM_SUBBUFFER_TYPE); > struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; > > - //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here > - if (!(ptr = cl_mem_map_gtt(mem))) { > + if(data->unsync_map == 1) > + //because using unsync map in clEnqueueMapBuffer, so force use map_gtt > here > + ptr = cl_mem_map_gtt(mem); > + else > + ptr = cl_mem_map_auto(mem); > + > + if (ptr == NULL) { > err = CL_MAP_FAILURE; > goto error; > } > - > - ptr = (char*)ptr + data->offset + buffer->sub_offset; > - assert(data->ptr == ptr); > + data->ptr = ptr; > > if(mem->flags & CL_MEM_USE_HOST_PTR) { > assert(mem->host_ptr); > + ptr = (char*)ptr + data->offset + buffer->sub_offset; > memcpy(mem->host_ptr + data->offset, ptr, data->size); > } > > @@ -271,12 +275,17 @@ cl_int cl_enqueue_map_image(enqueue_data *data) > void *ptr = NULL; > CHECK_IMAGE(mem, image); > > - if (!(ptr = cl_mem_map_gtt(mem))) { > + if(data->unsync_map == 1) > + //because using unsync map in clEnqueueMapBuffer, so force use map_gtt > here > + ptr = cl_mem_map_gtt(mem); > + else > + ptr = cl_mem_map_auto(mem); > + > + if (ptr == NULL) { > err = CL_MAP_FAILURE; > goto error; > } > - > - assert(data->ptr == (char*)ptr + data->offset); > + data->ptr = ptr; > > if(mem->flags & CL_MEM_USE_HOST_PTR) { > assert(mem->host_ptr); > @@ -323,7 +332,7 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data) > assert(v_ptr == mapped_ptr); > } > > - cl_mem_unmap_gtt(memobj); > + cl_mem_unmap_auto(memobj); > > /* shrink the mapped slot. */ > if (memobj->mapped_ptr_sz/2 > memobj->map_ref) { > diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h > index 1d3ae5f..c7e33da 100644 > --- a/src/cl_enqueue.h > +++ b/src/cl_enqueue.h > @@ -60,6 +60,7 @@ typedef struct _enqueue_data { > const void * const_ptr; /* Const ptr for memory read */ > void * ptr; /* Ptr for write and return value */ > const cl_mem* mem_list; /* mem_list of clEnqueueNativeKernel */ > + uint8_t unsync_map; /* Indicate the > clEnqueueMapBuffer/Image is unsync map */ > void (*user_func)(void *); /* pointer to a host-callable user > function */ > } enqueue_data; > > diff --git a/src/cl_mem.c b/src/cl_mem.c > index 6b8ca7c..7092385 100644 > --- a/src/cl_mem.c > +++ b/src/cl_mem.c > @@ -48,6 +48,8 @@ > #define CL_MEM_OBJECT_IMAGE2D 0x10F1 > #define CL_MEM_OBJECT_IMAGE3D 0x10F2 > > +#define MAX_TILING_SIZE 128 * MB > + > static cl_mem_object_type > cl_get_mem_object_type(cl_mem mem) > { > @@ -622,6 +624,15 @@ _cl_mem_new_image(cl_context ctx, > > sz = aligned_pitch * aligned_h * depth; > > + /* If sz is large than 128MB, map gtt may fail in some system. > + Because there is no obviours performance drop, disable tiling. */ > + if(tiling != CL_NO_TILE && sz > MAX_TILING_SIZE) { > + tiling = CL_NO_TILE; > + aligned_pitch = w * bpp; > + aligned_h = h; > + sz = aligned_pitch * aligned_h * depth; > + } > + > mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != > CL_NO_TILE, &err); > if (mem == NULL || err != CL_SUCCESS) > goto error; > @@ -714,7 +725,7 @@ cl_mem_delete(cl_mem mem) > for(i=0; i<mem->mapped_ptr_sz; i++) { > if(mem->mapped_ptr[i].ptr != NULL) { > mem->map_ref--; > - cl_mem_unmap_gtt(mem); > + cl_mem_unmap_auto(mem); > } > } > assert(mem->map_ref == 0); > @@ -1326,6 +1337,7 @@ cl_mem_map_gtt(cl_mem mem) > { > cl_buffer_map_gtt(mem->bo); > assert(cl_buffer_get_virtual(mem->bo)); > + mem->mapped_gtt = 1; > return cl_buffer_get_virtual(mem->bo); > } > > @@ -1356,8 +1368,10 @@ cl_mem_map_auto(cl_mem mem) > LOCAL cl_int > cl_mem_unmap_auto(cl_mem mem) > { > - if (IS_IMAGE(mem) && cl_mem_image(mem)->tiling != CL_NO_TILE) > + if (mem->mapped_gtt == 1) { > cl_buffer_unmap_gtt(mem->bo); > + mem->mapped_gtt = 0; > + } > else > cl_buffer_unmap(mem->bo); > return CL_SUCCESS; > diff --git a/src/cl_mem.h b/src/cl_mem.h > index 47a30dc..5719c60 100644 > --- a/src/cl_mem.h > +++ b/src/cl_mem.h > @@ -87,6 +87,7 @@ typedef struct _cl_mem { > cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by > caller. */ > int mapped_ptr_sz; /* The array size of mapped_ptr. */ > int map_ref; /* The mapped count. */ > + uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */ > cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */ > } _cl_mem; > > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
