LGTM, thanks.
> -----Original Message----- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Zhigang Gong > Sent: Thursday, July 03, 2014 2:15 PM > To: beignet@lists.freedesktop.org > Cc: Gong, Zhigang > Subject: [Beignet] [PATCH v2] runtime: fix a gpgpu event and thread local > gpgpu > handling bug. > > When pending a command queue, we need to record the whole gpgpu > structure not just the batch buffer. For the following reason: > > 1. We need to keep those private buffer, for example those printf buffers. > 2. We need to make sure this gpgpu will not be reused by other enqueuement. > > v2: > Don't try to flush all user event attached to the queue. > Just need to flush the current event when doing command queue flush. > > Signed-off-by: Zhigang Gong <zhigang.g...@intel.com> > --- > src/cl_api.c | 3 +- > src/cl_command_queue.c | 14 +++++++-- > src/cl_command_queue.h | 4 +++ > src/cl_driver.h | 8 ++---- > src/cl_driver_defs.c | 4 +-- > src/cl_enqueue.c | 2 +- > src/cl_event.c | 26 ++++++++++++++--- > src/cl_event.h | 7 +++-- > src/cl_thread.c | 20 +++++++++++++ > src/cl_thread.h | 3 ++ > src/intel/intel_batchbuffer.c | 13 --------- src/intel/intel_batchbuffer.h > | 1 - > src/intel/intel_gpgpu.c | 66 > +++++++++++++++++-------------------------- > 13 files changed, 97 insertions(+), 74 deletions(-) > > diff --git a/src/cl_api.c b/src/cl_api.c index d54ada6..8759027 100644 > --- a/src/cl_api.c > +++ b/src/cl_api.c > @@ -69,7 +69,7 @@ handle_events(cl_command_queue queue, cl_int num, > const cl_event *wait_list, > cl_event* event, enqueue_data* data, cl_command_type > type) { > cl_int status = cl_event_wait_events(num, wait_list, queue); > - cl_event e; > + cl_event e = NULL; > if(event != NULL || status == CL_ENQUEUE_EXECUTE_DEFER) { > e = cl_event_new(queue->ctx, queue, type, event!=NULL); > > @@ -85,6 +85,7 @@ handle_events(cl_command_queue queue, cl_int num, > const cl_event *wait_list, > cl_event_new_enqueue_callback(e, data, num, wait_list); > } > } > + queue->current_event = e; > return status; > } > > diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index > 8426c4e..cd268aa 100644 > --- a/src/cl_command_queue.c > +++ b/src/cl_command_queue.c > @@ -28,6 +28,7 @@ > #include "cl_alloc.h" > #include "cl_driver.h" > #include "cl_khr_icd.h" > +#include "cl_event.h" > #include "performance.h" > > #include <assert.h> > @@ -421,10 +422,9 @@ error: > return err; > } > > -LOCAL cl_int > -cl_command_queue_flush(cl_command_queue queue) > +LOCAL void > +cl_command_queue_flush_gpgpu(cl_command_queue queue, cl_gpgpu > gpgpu) > { > - GET_QUEUE_THREAD_GPGPU(queue); > size_t global_wk_sz[3]; > void* printf_info = cl_gpgpu_get_printf_info(gpgpu, global_wk_sz); > > @@ -447,7 +447,15 @@ cl_command_queue_flush(cl_command_queue > queue) > global_wk_sz[0] = global_wk_sz[1] = global_wk_sz[2] = 0; > cl_gpgpu_set_printf_info(gpgpu, NULL, global_wk_sz); > } > +} > > +LOCAL cl_int > +cl_command_queue_flush(cl_command_queue queue) { > + GET_QUEUE_THREAD_GPGPU(queue); > + cl_command_queue_flush_gpgpu(queue, gpgpu); > + if (queue->current_event) > + cl_event_flush(queue->current_event); > cl_invalid_thread_gpgpu(queue); > return CL_SUCCESS; > } > diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h index > b79d63a..bd70f25 100644 > --- a/src/cl_command_queue.h > +++ b/src/cl_command_queue.h > @@ -41,6 +41,7 @@ struct _cl_command_queue { > cl_int wait_events_num; /* Number of Non-complete user > events */ > cl_int wait_events_size; /* The size of array that > wait_events point to */ > cl_event last_event; /* The last event in the queue, > for enqueue mark used */ > + cl_event current_event; /* Current event. */ > cl_command_queue_properties props; /* Queue properties */ > cl_command_queue prev, next; /* We chain the command > queues together */ > void *thread_data; /* Used to store thread context > data */ > @@ -82,6 +83,9 @@ cl_int > cl_command_queue_set_fulsim_buffer(cl_command_queue, cl_mem); > /* Flush for the command queue */ > extern cl_int cl_command_queue_flush(cl_command_queue); > > +/* Flush for the specified gpgpu */ > +extern void cl_command_queue_flush_gpgpu(cl_command_queue, cl_gpgpu); > + > /* Wait for the completion of the command queue */ extern cl_int > cl_command_queue_finish(cl_command_queue); > > diff --git a/src/cl_driver.h b/src/cl_driver.h index 2999eb7..3d1d8d8 100644 > --- a/src/cl_driver.h > +++ b/src/cl_driver.h > @@ -197,13 +197,9 @@ extern cl_gpgpu_event_new_cb > *cl_gpgpu_event_new; typedef int > (cl_gpgpu_event_update_status_cb)(cl_gpgpu_event, int); extern > cl_gpgpu_event_update_status_cb *cl_gpgpu_event_update_status; > > -/* pending flush the batch buffer of this event */ -typedef void > (cl_gpgpu_event_pending_cb)(cl_gpgpu, cl_gpgpu_event); -extern > cl_gpgpu_event_pending_cb *cl_gpgpu_event_pending; > - > /* flush the batch buffer of this event */ -typedef void > (cl_gpgpu_event_resume_cb)(cl_gpgpu_event); > -extern cl_gpgpu_event_resume_cb *cl_gpgpu_event_resume; > +typedef void (cl_gpgpu_event_flush_cb)(cl_gpgpu_event); > +extern cl_gpgpu_event_flush_cb *cl_gpgpu_event_flush; > > /* cancel exec batch buffer of this event */ typedef void > (cl_gpgpu_event_cancel_cb)(cl_gpgpu_event); > diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index > c9385c4..72f25d9 > 100644 > --- a/src/cl_driver_defs.c > +++ b/src/cl_driver_defs.c > @@ -79,9 +79,7 @@ LOCAL cl_gpgpu_walker_cb *cl_gpgpu_walker = NULL; > LOCAL cl_gpgpu_bind_sampler_cb *cl_gpgpu_bind_sampler = NULL; LOCAL > cl_gpgpu_event_new_cb *cl_gpgpu_event_new = NULL; LOCAL > cl_gpgpu_event_update_status_cb *cl_gpgpu_event_update_status = NULL; > -LOCAL cl_gpgpu_event_pending_cb *cl_gpgpu_event_pending = NULL; -LOCAL > cl_gpgpu_event_resume_cb *cl_gpgpu_event_resume = NULL; -LOCAL > cl_gpgpu_event_cancel_cb *cl_gpgpu_event_cancel = NULL; > +LOCAL cl_gpgpu_event_flush_cb *cl_gpgpu_event_flush = NULL; > LOCAL cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete = NULL; LOCAL > cl_gpgpu_event_get_exec_timestamp_cb > *cl_gpgpu_event_get_exec_timestamp = NULL; LOCAL > cl_gpgpu_event_get_gpu_cur_timestamp_cb > *cl_gpgpu_event_get_gpu_cur_timestamp = NULL; diff --git > a/src/cl_enqueue.c b/src/cl_enqueue.c index 7fa44ff..af118ad 100644 > --- a/src/cl_enqueue.c > +++ b/src/cl_enqueue.c > @@ -461,7 +461,7 @@ cl_int cl_enqueue_handle(cl_event event, > enqueue_data* data) > case EnqueueNDRangeKernel: > case EnqueueFillBuffer: > case EnqueueFillImage: > - cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr); > + cl_event_flush(event); > return CL_SUCCESS; > case EnqueueNativeKernel: > return cl_enqueue_native_kernel(data); diff --git a/src/cl_event.c > b/src/cl_event.c index a3af59c..ec5cb68 100644 > --- a/src/cl_event.c > +++ b/src/cl_event.c > @@ -46,6 +46,17 @@ cl_event_is_gpu_command_type(cl_command_type > type) > } > } > > +void cl_event_flush(cl_event event) > +{ > + assert(event->gpgpu_event != NULL); > + if (event->gpgpu) { > + cl_command_queue_flush_gpgpu(event->queue, event->gpgpu); > + cl_gpgpu_delete(event->gpgpu); > + event->gpgpu = NULL; > + } > + cl_gpgpu_event_flush(event->gpgpu_event); > +} > + > cl_event cl_event_new(cl_context ctx, cl_command_queue queue, > cl_command_type type, cl_bool emplict) { > cl_event event = NULL; > @@ -138,6 +149,11 @@ void cl_event_delete(cl_event event) > pthread_mutex_unlock(&event->ctx->event_lock); > cl_context_delete(event->ctx); > > + if (event->gpgpu) { > + fprintf(stderr, "Warning: a event is deleted with a pending enqueued > task.\n"); > + cl_gpgpu_delete(event->gpgpu); > + event->gpgpu = NULL; > + } > cl_free(event); > } > > @@ -257,7 +273,6 @@ void cl_event_new_enqueue_callback(cl_event event, > cl_command_queue queue = event->queue; > cl_int i; > cl_int err = CL_SUCCESS; > - GET_QUEUE_THREAD_GPGPU(data->queue); > > /* Allocate and initialize the structure itself */ > TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback)); @@ -344,7 +359,7 > @@ void cl_event_new_enqueue_callback(cl_event event, > } > } > if(data->queue != NULL && event->gpgpu_event != NULL) { > - cl_gpgpu_event_pending(gpgpu, event->gpgpu_event); > + event->gpgpu = cl_thread_gpgpu_take(event->queue); > data->ptr = (void *)event->gpgpu_event; > } > cb->data = *data; > @@ -394,8 +409,11 @@ void cl_event_set_status(cl_event event, cl_int > status) > if(event->gpgpu_event) > cl_gpgpu_event_update_status(event->gpgpu_event, 1); //now > set complet, need refine > } else { > - if(event->gpgpu_event) > - cl_gpgpu_event_cancel(event->gpgpu_event); //Error cancel > the enqueue > + if(event->gpgpu_event) { > + // Error then cancel the enqueued event. > + cl_gpgpu_delete(event->gpgpu); > + event->gpgpu = NULL; > + } > } > > event->status = status; //Change the event status after enqueue > and befor unlock diff --git a/src/cl_event.h b/src/cl_event.h index > bd8a5c7..3c23d74 100644 > --- a/src/cl_event.h > +++ b/src/cl_event.h > @@ -63,6 +63,7 @@ struct _cl_event { > cl_command_queue queue; /* The command queue associated > with event */ > cl_command_type type; /* The command type associated > with event */ > cl_int status; /* The execution status */ > + cl_gpgpu gpgpu; /* Current gpgpu, owned by this > structure. */ > cl_gpgpu_event gpgpu_event; /* The event object communicate with > hardware */ > user_callback* user_cb; /* The event callback functions */ > enqueue_callback* enqueue_cb; /* This event's enqueue */ @@ -95,9 > +96,11 @@ cl_int cl_event_marker_with_wait_list(cl_command_queue, > cl_uint, const cl_event cl_int > cl_event_barrier_with_wait_list(cl_command_queue, cl_uint, const cl_event *, > cl_event*); > /* Do the event profiling */ > cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info > param_name); -/*insert the user event*/ > +/* insert the user event */ > cl_int cl_event_insert_user_event(user_event** p_u_ev, cl_event event); > -/*remove the user event*/ > +/* remove the user event */ > cl_int cl_event_remove_user_event(user_event** p_u_ev, cl_event event); > +/* flush the event's pending gpgpu batch buffer and notify driver this > +gpgpu event has been flushed. */ void cl_event_flush(cl_event event); > #endif /* __CL_EVENT_H__ */ > > diff --git a/src/cl_thread.c b/src/cl_thread.c index 4692ed7..5713d70 100644 > --- a/src/cl_thread.c > +++ b/src/cl_thread.c > @@ -209,6 +209,26 @@ void cl_invalid_thread_gpgpu(cl_command_queue > queue) > spec->valid = 0; > } > > +cl_gpgpu cl_thread_gpgpu_take(cl_command_queue queue) { > + queue_thread_private *thread_private = ((queue_thread_private > +*)(queue->thread_data)); > + thread_spec_data* spec = NULL; > + > + pthread_mutex_lock(&thread_private->thread_data_lock); > + spec = thread_private->threads_data[thread_id]; > + assert(spec); > + pthread_mutex_unlock(&thread_private->thread_data_lock); > + > + if (!spec->valid) > + return NULL; > + > + assert(spec->gpgpu); > + cl_gpgpu gpgpu = spec->gpgpu; > + spec->gpgpu = NULL; > + spec->valid = 0; > + return gpgpu; > +} > + > /* The destructor for clean the thread specific data. */ void > cl_thread_data_destroy(cl_command_queue queue) { diff --git > a/src/cl_thread.h b/src/cl_thread.h index bf855a2..ecc99ad 100644 > --- a/src/cl_thread.h > +++ b/src/cl_thread.h > @@ -41,4 +41,7 @@ void cl_set_thread_batch_buf(cl_command_queue > queue, void* buf); > /* Used to get the batch buffer of each thread. */ > void* cl_get_thread_batch_buf(cl_command_queue queue); > > +/* take current gpgpu from the thread gpgpu pool. */ cl_gpgpu > +cl_thread_gpgpu_take(cl_command_queue queue); > + > #endif /* __CL_THREAD_H__ */ > diff --git a/src/intel/intel_batchbuffer.c b/src/intel/intel_batchbuffer.c > index > d0da77a..7767db3 100644 > --- a/src/intel/intel_batchbuffer.c > +++ b/src/intel/intel_batchbuffer.c > @@ -185,16 +185,3 @@ intel_batchbuffer_delete(intel_batchbuffer_t *batch) > > cl_free(batch); > } > - > -LOCAL void > -intel_batchbuffer_take(intel_batchbuffer_t *from, intel_batchbuffer_t *to) -{ > - *to = *from; > - //Need not unreference the buffer, to will unreference it. > - from->buffer = NULL; > - from->map = NULL; > - from->ptr = NULL; > - from->size = 0; > - from->atomic = 0; > - from->enable_slm = 0; > -} > diff --git a/src/intel/intel_batchbuffer.h b/src/intel/intel_batchbuffer.h > index > c62043e..0c3bc13 100644 > --- a/src/intel/intel_batchbuffer.h > +++ b/src/intel/intel_batchbuffer.h > @@ -101,7 +101,6 @@ extern void intel_batchbuffer_init(intel_batchbuffer_t*, > struct intel_driver*); extern void > intel_batchbuffer_terminate(intel_batchbuffer_t*); > extern void intel_batchbuffer_flush(intel_batchbuffer_t*); > extern void intel_batchbuffer_reset(intel_batchbuffer_t*, size_t sz); -extern > void intel_batchbuffer_take(intel_batchbuffer_t*, intel_batchbuffer_t *); > > static INLINE uint32_t > intel_batchbuffer_space(const intel_batchbuffer_t *batch) diff --git > a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index a7c449d..d00bc83 > 100644 > --- a/src/intel/intel_gpgpu.c > +++ b/src/intel/intel_gpgpu.c > @@ -60,9 +60,8 @@ typedef struct surface_heap { } surface_heap_t; > > typedef struct intel_event { > - intel_batchbuffer_t *batch; > - drm_intel_bo* buffer; > - drm_intel_bo* ts_buf; > + drm_intel_bo *buffer; > + drm_intel_bo *ts_buf; > int status; > } intel_event_t; > > @@ -569,12 +568,19 @@ > intel_gpgpu_check_binded_buf_address(intel_gpgpu_t *gpgpu) } > > static void > +intel_gpgpu_flush_batch_buffer(intel_batchbuffer_t *batch) { > + assert(batch); > + intel_batchbuffer_emit_mi_flush(batch); > + intel_batchbuffer_flush(batch); > +} > + > +static void > intel_gpgpu_flush(intel_gpgpu_t *gpgpu) { > if (!gpgpu->batch || !gpgpu->batch->buffer) > return; > - intel_batchbuffer_emit_mi_flush(gpgpu->batch); > - intel_batchbuffer_flush(gpgpu->batch); > + intel_gpgpu_flush_batch_buffer(gpgpu->batch); > intel_gpgpu_check_binded_buf_address(gpgpu); > } > > @@ -1156,11 +1162,10 @@ intel_gpgpu_event_new(intel_gpgpu_t *gpgpu) > intel_event_t *event = NULL; > TRY_ALLOC_NO_ERR (event, CALLOC(intel_event_t)); > > - event->status = command_queued; > - event->batch = NULL; > event->buffer = gpgpu->batch->buffer; > - if(event->buffer != NULL) > + if (event->buffer) > drm_intel_bo_reference(event->buffer); > + event->status = command_queued; > > if(gpgpu->time_stamp_b.bo) { > event->ts_buf = gpgpu->time_stamp_b.bo; @@ -1175,6 +1180,17 @@ > error: > goto exit; > } > > +/* > + The upper layer already flushed the batch buffer, just update > + internal status to command_submitted. > +*/ > +static void > +intel_gpgpu_event_flush(intel_event_t *event) { > + assert(event->status == command_queued); > + event->status = command_running; > +} > + > static int > intel_gpgpu_event_update_status(intel_event_t *event, int wait) { @@ > -1182,7 +1198,7 @@ intel_gpgpu_event_update_status(intel_event_t *event, > int wait) > return event->status; > > if (event->buffer && > - event->batch == NULL && //have flushed > + event->status == command_running && > !drm_intel_bo_busy(event->buffer)) { > event->status = command_complete; > drm_intel_bo_unreference(event->buffer); > @@ -1203,36 +1219,8 @@ intel_gpgpu_event_update_status(intel_event_t > *event, int wait) } > > static void > -intel_gpgpu_event_pending(intel_gpgpu_t *gpgpu, intel_event_t *event) -{ > - assert(event->buffer); //This is gpu enqueue command > - assert(event->batch == NULL); //This command haven't pengding. > - event->batch = intel_batchbuffer_new(gpgpu->drv); > - assert(event->batch); > - intel_batchbuffer_take(gpgpu->batch, event->batch); -} > - > -static void > -intel_gpgpu_event_resume(intel_event_t *event) -{ > - assert(event->batch); //This command have pending. > - intel_batchbuffer_flush(event->batch); > - intel_batchbuffer_delete(event->batch); > - event->batch = NULL; > -} > - > -static void > -intel_gpgpu_event_cancel(intel_event_t *event) -{ > - assert(event->batch); //This command have pending. > - intel_batchbuffer_delete(event->batch); > - event->batch = NULL; > -} > - > -static void > intel_gpgpu_event_delete(intel_event_t *event) { > - assert(event->batch == NULL); //This command must have been flushed. > if(event->buffer) > drm_intel_bo_unreference(event->buffer); > if(event->ts_buf) > @@ -1412,10 +1400,8 @@ intel_set_gpgpu_callbacks(int device_id) > cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) > intel_gpgpu_bind_sampler; > cl_gpgpu_set_scratch = (cl_gpgpu_set_scratch_cb *) > intel_gpgpu_set_scratch; > cl_gpgpu_event_new = (cl_gpgpu_event_new_cb > *)intel_gpgpu_event_new; > + cl_gpgpu_event_flush = (cl_gpgpu_event_flush_cb > + *)intel_gpgpu_event_flush; > cl_gpgpu_event_update_status = (cl_gpgpu_event_update_status_cb > *)intel_gpgpu_event_update_status; > - cl_gpgpu_event_pending = (cl_gpgpu_event_pending_cb > *)intel_gpgpu_event_pending; > - cl_gpgpu_event_resume = (cl_gpgpu_event_resume_cb > *)intel_gpgpu_event_resume; > - cl_gpgpu_event_cancel = (cl_gpgpu_event_cancel_cb > *)intel_gpgpu_event_cancel; > cl_gpgpu_event_delete = (cl_gpgpu_event_delete_cb > *)intel_gpgpu_event_delete; > cl_gpgpu_event_get_exec_timestamp = > (cl_gpgpu_event_get_exec_timestamp_cb > *)intel_gpgpu_event_get_exec_timestamp; > cl_gpgpu_event_get_gpu_cur_timestamp = > (cl_gpgpu_event_get_gpu_cur_timestamp_cb > *)intel_gpgpu_event_get_gpu_cur_timestamp; > -- > 1.8.3.2 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet