I notice this should be caused by event timestamp recording sequence. We submit the NDRange and then record the queued timestamp, this is wrong. I have already sent an another patch to "Improve event execute function" to modify this. You can have a try, apply that patch based on this patch. thanks.
On Thu, Dec 22, 2016 at 06:41:49AM +0000, Pan, Xiuli wrote: > Date: Thu, 22 Dec 2016 06:41:49 +0000 > From: "Pan, Xiuli" <xiuli....@intel.com> > To: "junyan...@inbox.com" <junyan...@inbox.com>, > "beignet@lists.freedesktop.org" <beignet@lists.freedesktop.org> > Subject: Re: [Beignet] [PATCH V4] Add profiling feature based on new event > implementation. > > It seems still have bugs. > Here are some logs I got, the gen timestamps is print in the function > cl_event_update_timestamp_gen, and the final result is print last as > timestamp. > > gen timestamp[0] is d88bddb30 > gen timestamp[1] is d88bde2b0 > run for 8 times > gen timestamp[2] is d88bddae0 // It is smaller than timestamp[0] we get some > negative value > gen timestamp[3] is d8f002390 > timestamp[2] is ffffffffffffffaf > timestamp[3] is 642485f > gen timestamp[0] is d8f03fab0 > gen timestamp[1] is d8f0400f0 > run for 9 times > gen timestamp[2] is d8f03fd30 > gen timestamp[3] is d954687d0 > timestamp[2] is 27f > timestamp[3] is 6428d1f > gen timestamp[0] is d954a9d20 > gen timestamp[1] is d954aa450 > run for 10 times > gen timestamp[2] is d954a9d20 //It is the same as timestamp[0] we get -1 > gen timestamp[3] is d9b8df420 > timestamp[2] is ffffffffffffffff > > > The overflow handler seems to have some problems. > > -----Original Message----- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > junyan...@inbox.com > Sent: Monday, December 19, 2016 7:24 PM > To: beignet@lists.freedesktop.org > Subject: [Beignet] [PATCH V4] Add profiling feature based on new event > implementation. > > From: Junyan He <junyan...@intel.com> > > TODO: > In opencl 2.0, a new profiling item called CL_PROFILING_COMMAND_COMPLETE is > imported. It means that we need to record the time stamp of all the child > events created by the "Kernel enqueing kernels" feature finish. > This should be done after the "Kernel enqueing kernels" feature enabled. > > V2: > Update event time stamp before inserting to queue thread, avoid MT issue. > > V3: > Fixup overflow problem. > > V4: > Fixup overflow to 0xfffffffffffffffff problem. > Just take ownership and release event lock when call the update timestamp > function. The update timestamp function may have block system call can should > not hold the lock to call it. > > Signed-off-by: Junyan He <junyan...@intel.com> > --- > src/cl_api.c | 51 ---------------- > src/cl_api_event.c | 41 +++++++++++++ > src/cl_api_mem.c | 9 +++ > src/cl_base_object.c | 29 ++++++--- > src/cl_base_object.h | 10 ++-- > src/cl_command_queue_enqueue.c | 2 + > src/cl_driver.h | 4 +- > src/cl_enqueue.c | 9 --- > src/cl_event.c | 132 > ++++++++++++++++++++++++++++++++--------- > src/cl_event.h | 10 ++-- > src/intel/intel_gpgpu.c | 16 +++-- > 11 files changed, 195 insertions(+), 118 deletions(-) > > diff --git a/src/cl_api.c b/src/cl_api.c index d7b5434..6a4f4ec 100644 > --- a/src/cl_api.c > +++ b/src/cl_api.c > @@ -1312,57 +1312,6 @@ error: > return err; > } > > - > -cl_int > -clGetEventProfilingInfo(cl_event event, > - cl_profiling_info param_name, > - size_t param_value_size, > - void * param_value, > - size_t * param_value_size_ret) > -{ > - cl_int err = CL_SUCCESS; > - cl_ulong ret_val; > - > - CHECK_EVENT(event); > - //cl_event_update_status(event, 0); > - > - if (event->event_type == CL_COMMAND_USER || > - !(event->queue->props & CL_QUEUE_PROFILING_ENABLE) || > - event->status != CL_COMPLETE) { > - err = CL_PROFILING_INFO_NOT_AVAILABLE; > - goto error; > - } > - > - if (param_value && param_value_size < sizeof(cl_ulong)) { > - err = CL_INVALID_VALUE; > - goto error; > - } > - > - if (param_name == CL_PROFILING_COMMAND_QUEUED) { > - ret_val = event->queued_timestamp; > - } else if (param_name == CL_PROFILING_COMMAND_SUBMIT) { > - ret_val= event->queued_timestamp + > cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[1]); > - } else if (param_name == CL_PROFILING_COMMAND_START) { > - err = cl_event_get_timestamp(event, CL_PROFILING_COMMAND_START); > - ret_val = event->queued_timestamp + cl_event_get_start_timestamp(event); > - } else if (param_name == CL_PROFILING_COMMAND_END) { > - err = cl_event_get_timestamp(event, CL_PROFILING_COMMAND_END); > - ret_val = event->queued_timestamp + cl_event_get_end_timestamp(event); > - } else { > - err = CL_INVALID_VALUE; > - goto error; > - } > - > - if (err == CL_SUCCESS) { > - if (param_value) > - *(cl_ulong*)param_value = ret_val; > - if (param_value_size_ret) > - *param_value_size_ret = sizeof(cl_ulong); > - } > -error: > - return err; > -} > - > cl_mem clCreatePipe (cl_context context, > cl_mem_flags flags, > cl_uint pipe_packet_size, diff --git > a/src/cl_api_event.c b/src/cl_api_event.c index 8f2b8e0..af1442a 100644 > --- a/src/cl_api_event.c > +++ b/src/cl_api_event.c > @@ -290,3 +290,44 @@ clGetEventInfo(cl_event event, > return cl_get_info_helper(src_ptr, src_size, > param_value, param_value_size, > param_value_size_ret); } > + > +cl_int > +clGetEventProfilingInfo(cl_event event, > + cl_profiling_info param_name, > + size_t param_value_size, > + void *param_value, > + size_t *param_value_size_ret) { > + cl_ulong ret_val; > + > + if (!CL_OBJECT_IS_EVENT(event)) { > + return CL_INVALID_EVENT; > + } > + > + assert(event->event_type == CL_COMMAND_USER || event->queue != NULL); > + if (event->event_type == CL_COMMAND_USER || > + !(event->queue->props & CL_QUEUE_PROFILING_ENABLE) || > + cl_event_get_status(event) != CL_COMPLETE) { > + return CL_PROFILING_INFO_NOT_AVAILABLE; } > + > + if (param_value && param_value_size < sizeof(cl_ulong)) { > + return CL_INVALID_VALUE; > + } > + > + if (param_name < CL_PROFILING_COMMAND_QUEUED || > + param_name > CL_PROFILING_COMMAND_COMPLETE) { > + return CL_INVALID_VALUE; > + } > + > + ret_val = event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED]; > + if (ret_val == CL_EVENT_INVALID_TIMESTAMP) { > + return CL_INVALID_VALUE; > + } > + > + if (param_value) > + *(cl_ulong *)param_value = ret_val; > + if (param_value_size_ret) > + *param_value_size_ret = sizeof(cl_ulong); > + return CL_SUCCESS; > +} > diff --git a/src/cl_api_mem.c b/src/cl_api_mem.c index 07be706..de18684 100644 > --- a/src/cl_api_mem.c > +++ b/src/cl_api_mem.c > @@ -318,6 +318,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue, > > ptr = data->ptr; > e->status = CL_COMPLETE; // Just set the status, no notify. No one > depend on us now. > + cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE); > } else { > err = cl_enqueue_handle(data, CL_SUBMITTED); // Submit to get the > address. > if (err != CL_SUCCESS) { > @@ -410,6 +411,7 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue, > } > > e->status = CL_COMPLETE; > + cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE); > } else { // May need to wait some event to complete. > cl_command_queue_enqueue_event(command_queue, e); > } > @@ -513,6 +515,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue, > } > > e->status = CL_COMPLETE; // Just set the status, no notify. No one > depend on us now. > + cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE); > } else { > cl_command_queue_enqueue_event(command_queue, e); > } > @@ -616,6 +619,7 @@ clEnqueueWriteBuffer(cl_command_queue command_queue, > } > > e->status = CL_COMPLETE; // Just set the status, no notify. No one > depend on us now. > + cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE); > } else { > cl_command_queue_enqueue_event(command_queue, e); > } > @@ -765,6 +769,7 @@ clEnqueueReadBufferRect(cl_command_queue command_queue, > } > > e->status = CL_COMPLETE; // Just set the status, no notify. No one > depend on us now. > + cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE); > } else { > cl_command_queue_enqueue_event(command_queue, e); > } > @@ -916,6 +921,7 @@ clEnqueueWriteBufferRect(cl_command_queue command_queue, > } > > e->status = CL_COMPLETE; // Just set the status, no notify. No one > depend on us now. > + cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE); > } else { > cl_command_queue_enqueue_event(command_queue, e); > } > @@ -1601,6 +1607,7 @@ clEnqueueMapImage(cl_command_queue command_queue, > > ptr = data->ptr; > e->status = CL_COMPLETE; // Just set the status, no notify. No one > depend on us now. > + cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE); > } else { > err = cl_enqueue_handle(data, CL_SUBMITTED); // Submit to get the > address. > if (err != CL_SUCCESS) { > @@ -1798,6 +1805,7 @@ clEnqueueReadImage(cl_command_queue command_queue, > } > > e->status = CL_COMPLETE; // Just set the status, no notify. No one > depend on us now. > + cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE); > } else { > cl_command_queue_enqueue_event(command_queue, e); > } > @@ -1950,6 +1958,7 @@ clEnqueueWriteImage(cl_command_queue command_queue, > } > > e->status = CL_COMPLETE; // Just set the status, no notify. No one > depend on us now. > + cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE); > } else { > cl_command_queue_enqueue_event(command_queue, e); > } > diff --git a/src/cl_base_object.c b/src/cl_base_object.c index > 00c4b35..cbbd872 100644 > --- a/src/cl_base_object.c > +++ b/src/cl_base_object.c > @@ -66,7 +66,7 @@ cl_object_destroy_base(cl_base_object obj) } > > LOCAL cl_int > -cl_object_take_ownership(cl_base_object obj, cl_int wait) > +cl_object_take_ownership(cl_base_object obj, cl_int wait, cl_bool > +withlock) > { > pthread_t self; > > @@ -74,21 +74,26 @@ cl_object_take_ownership(cl_base_object obj, cl_int wait) > > self = pthread_self(); > > - pthread_mutex_lock(&obj->mutex); > + if (withlock == CL_FALSE) > + pthread_mutex_lock(&obj->mutex); > > if (pthread_equal(obj->owner, self)) { // Already get > - pthread_mutex_unlock(&obj->mutex); > + if (withlock == CL_FALSE) > + pthread_mutex_unlock(&obj->mutex); > return 1; > } > > if (pthread_equal(obj->owner, invalid_thread_id)) { > obj->owner = self; > - pthread_mutex_unlock(&obj->mutex); > + > + if (withlock == CL_FALSE) > + pthread_mutex_unlock(&obj->mutex); > return 1; > } > > if (wait == 0) { > - pthread_mutex_unlock(&obj->mutex); > + if (withlock == CL_FALSE) > + pthread_mutex_unlock(&obj->mutex); > return 0; > } > > @@ -97,21 +102,27 @@ cl_object_take_ownership(cl_base_object obj, cl_int wait) > } > > obj->owner = self; > - pthread_mutex_unlock(&obj->mutex); > + > + if (withlock == CL_FALSE) > + pthread_mutex_unlock(&obj->mutex); > + > return 1; > } > > LOCAL void > -cl_object_release_ownership(cl_base_object obj) > +cl_object_release_ownership(cl_base_object obj, cl_bool withlock) > { > assert(CL_OBJECT_IS_VALID(obj)); > > - pthread_mutex_lock(&obj->mutex); > + if (withlock == CL_FALSE) > + pthread_mutex_lock(&obj->mutex); > + > assert(pthread_equal(pthread_self(), obj->owner)); > obj->owner = invalid_thread_id; > pthread_cond_broadcast(&obj->cond); > > - pthread_mutex_unlock(&obj->mutex); > + if (withlock == CL_FALSE) > + pthread_mutex_unlock(&obj->mutex); > } > > LOCAL void > diff --git a/src/cl_base_object.h b/src/cl_base_object.h index > 4e643df..9d9dd94 100644 > --- a/src/cl_base_object.h > +++ b/src/cl_base_object.h > @@ -67,15 +67,17 @@ typedef struct _cl_base_object *cl_base_object; > > extern void cl_object_init_base(cl_base_object obj, cl_ulong magic); extern > void cl_object_destroy_base(cl_base_object obj); -extern cl_int > cl_object_take_ownership(cl_base_object obj, cl_int wait); -extern void > cl_object_release_ownership(cl_base_object obj); > +extern cl_int cl_object_take_ownership(cl_base_object obj, cl_int wait, > +cl_bool withlock); extern void > +cl_object_release_ownership(cl_base_object obj, cl_bool withlock); > extern void cl_object_wait_on_cond(cl_base_object obj); extern void > cl_object_notify_cond(cl_base_object obj); > > #define CL_OBJECT_INIT_BASE(obj, magic) > (cl_object_init_base((cl_base_object)obj, magic)) #define > CL_OBJECT_DESTROY_BASE(obj) (cl_object_destroy_base((cl_base_object)obj)) > -#define CL_OBJECT_TAKE_OWNERSHIP(obj, wait) > (cl_object_take_ownership((cl_base_object)obj, wait)) -#define > CL_OBJECT_RELEASE_OWNERSHIP(obj) > (cl_object_release_ownership((cl_base_object)obj)) > +#define CL_OBJECT_TAKE_OWNERSHIP(obj, wait) > +(cl_object_take_ownership((cl_base_object)obj, wait, CL_FALSE)) #define > +CL_OBJECT_RELEASE_OWNERSHIP(obj) > +(cl_object_release_ownership((cl_base_object)obj, CL_FALSE)) #define > +CL_OBJECT_TAKE_OWNERSHIP_WITHLOCK(obj, wait) > +(cl_object_take_ownership((cl_base_object)obj, wait, CL_TRUE)) #define > +CL_OBJECT_RELEASE_OWNERSHIP_WITHLOCK(obj) > +(cl_object_release_ownership((cl_base_object)obj, CL_TRUE)) > #define CL_OBJECT_WAIT_ON_COND(obj) > (cl_object_wait_on_cond((cl_base_object)obj)) > #define CL_OBJECT_NOTIFY_COND(obj) > (cl_object_notify_cond((cl_base_object)obj)) > > diff --git a/src/cl_command_queue_enqueue.c b/src/cl_command_queue_enqueue.c > index 32545b3..cf9ee3f 100644 > --- a/src/cl_command_queue_enqueue.c > +++ b/src/cl_command_queue_enqueue.c > @@ -135,6 +135,8 @@ LOCAL void > cl_command_queue_enqueue_event(cl_command_queue queue, cl_event event) { > CL_OBJECT_INC_REF(event); > + cl_event_update_timestamp(event, CL_QUEUED, event->status); > + > assert(CL_OBJECT_IS_COMMAND_QUEUE(queue)); > CL_OBJECT_LOCK(queue); > assert(queue->worker.quit == CL_FALSE); diff --git a/src/cl_driver.h > b/src/cl_driver.h index b45e2fb..9b2fd32 100644 > --- a/src/cl_driver.h > +++ b/src/cl_driver.h > @@ -262,11 +262,11 @@ typedef void (cl_gpgpu_event_delete_cb)(cl_gpgpu_event); > extern cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete; > > /* Get a event time stamp */ > -typedef void (cl_gpgpu_event_get_exec_timestamp_cb)(cl_gpgpu, > cl_gpgpu_event, int, uint64_t*); > +typedef void (cl_gpgpu_event_get_exec_timestamp_cb)(cl_gpgpu, int, > +uint64_t*); > extern cl_gpgpu_event_get_exec_timestamp_cb > *cl_gpgpu_event_get_exec_timestamp; > > /* Get current GPU time stamp */ > -typedef void (cl_gpgpu_event_get_gpu_cur_timestamp_cb)(cl_gpgpu, uint64_t*); > +typedef void (cl_gpgpu_event_get_gpu_cur_timestamp_cb)(cl_driver, > +uint64_t*); > extern cl_gpgpu_event_get_gpu_cur_timestamp_cb > *cl_gpgpu_event_get_gpu_cur_timestamp; > > /* Get current batch buffer handle */ > diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index fbcd7b6..59605f9 100644 > --- a/src/cl_enqueue.c > +++ b/src/cl_enqueue.c > @@ -572,9 +572,6 @@ cl_enqueue_ndrange(enqueue_data *data, cl_int status) > void *batch_buf = cl_gpgpu_ref_batch_buf(data->gpgpu); > cl_gpgpu_sync(batch_buf); > cl_gpgpu_unref_batch_buf(batch_buf); > - /* Finished, we can release the gpgpu now. */ > - cl_gpgpu_delete(data->gpgpu); > - data->gpgpu = NULL; > } > > return err; > @@ -626,12 +623,6 @@ cl_enqueue_delete(enqueue_data *data) LOCAL cl_int > cl_enqueue_handle(enqueue_data *data, cl_int status) { > - /* if need profiling, add the submit timestamp here. */ > - // if (event && event->event_type != CL_COMMAND_USER && > - // event->queue->props & CL_QUEUE_PROFILING_ENABLE) { > - // cl_event_get_timestamp(event, CL_PROFILING_COMMAND_SUBMIT); > - // } > - > switch (data->type) { > case EnqueueReturnSuccesss: > return CL_SUCCESS; > diff --git a/src/cl_event.c b/src/cl_event.c index 0804dbd..212f184 100644 > --- a/src/cl_event.c > +++ b/src/cl_event.c > @@ -23,46 +23,107 @@ > #include <string.h> > #include <stdio.h> > > -LOCAL cl_int > -cl_event_get_timestamp(cl_event event, cl_profiling_info param_name) > +// TODO: Need to move it to some device related file later. > +static void > +cl_event_update_timestamp_gen(cl_event event, cl_int status) > { > - // TODO: > - return CL_INVALID_VALUE; > + cl_ulong ts = 0; > + > + if ((event->exec_data.type == EnqueueCopyBufferRect) || > + (event->exec_data.type == EnqueueCopyBuffer) || > + (event->exec_data.type == EnqueueCopyImage) || > + (event->exec_data.type == EnqueueCopyBufferToImage) || > + (event->exec_data.type == EnqueueCopyImageToBuffer) || > + (event->exec_data.type == EnqueueNDRangeKernel) || > + (event->exec_data.type == EnqueueFillBuffer) || > + (event->exec_data.type == EnqueueFillImage)) { > + > + if (status == CL_QUEUED || status == CL_SUBMITTED) { > + cl_gpgpu_event_get_gpu_cur_timestamp(event->queue->ctx->drv, > + &ts); > + > + if (ts == CL_EVENT_INVALID_TIMESTAMP) > + ts++; > + event->timestamp[CL_QUEUED - status] = ts; > + return; > + } else if (status == CL_RUNNING) { > + assert(event->exec_data.gpgpu); > + return; // Wait for the event complete and get run and complete then. > + } else { > + assert(event->exec_data.gpgpu); > + cl_gpgpu_event_get_exec_timestamp(event->exec_data.gpgpu, 0, &ts); > + if (ts == CL_EVENT_INVALID_TIMESTAMP) > + ts++; > + event->timestamp[2] = ts; > + cl_gpgpu_event_get_exec_timestamp(event->exec_data.gpgpu, 1, &ts); > + if (ts == CL_EVENT_INVALID_TIMESTAMP) > + ts++; > + event->timestamp[3] = ts; > + return; > + } > + } else { > + cl_gpgpu_event_get_gpu_cur_timestamp(event->queue->ctx->drv, &ts); > + if (ts == CL_EVENT_INVALID_TIMESTAMP) > + ts++; > + event->timestamp[CL_QUEUED - status] = ts; > + return; > + } > } > > -LOCAL cl_ulong > -cl_event_get_timestamp_delta(cl_ulong start_timestamp, cl_ulong > end_timestamp) > +LOCAL void > +cl_event_update_timestamp(cl_event event, cl_int from, cl_int to) > { > - cl_ulong ret_val; > + int i; > + cl_bool re_cal = CL_FALSE; > + cl_ulong ts[4]; > > - if (end_timestamp > start_timestamp) { > - ret_val = end_timestamp - start_timestamp; > - } else { > - /*if start time stamp is greater than end timstamp then set ret value to > max*/ > - ret_val = ((cl_ulong)1 << 32); > - } > + assert(from >= to); > + assert(from >= CL_COMPLETE || from <= CL_QUEUED); assert(to >= > + CL_COMPLETE || to <= CL_QUEUED); > > - return ret_val; > -} > + if (event->event_type == CL_COMMAND_USER) > + return; > > -LOCAL cl_ulong > -cl_event_get_start_timestamp(cl_event event) -{ > - cl_ulong ret_val; > + assert(event->queue); > + if ((event->queue->props & CL_QUEUE_PROFILING_ENABLE) == 0) > + return; > > - ret_val = cl_event_get_timestamp_delta(event->timestamp[0], > event->timestamp[2]); > + i = CL_QUEUED - from; > + if (event->timestamp[i] == CL_EVENT_INVALID_TIMESTAMP) > + cl_event_update_timestamp_gen(event, from); i++; > > - return ret_val; > -} > + for (; i <= CL_QUEUED - to; i++) { > + cl_event_update_timestamp_gen(event, CL_QUEUED - i); } > > -LOCAL cl_ulong > -cl_event_get_end_timestamp(cl_event event) -{ > - cl_ulong ret_val; > + if (to == CL_COMPLETE) { > + // TODO: Need to set the CL_PROFILING_COMMAND_COMPLETE when enable child > enqueue. > + // Just a duplicate of event complete time now. > + event->timestamp[4] = event->timestamp[3]; > > - ret_val = cl_event_get_timestamp_delta(event->timestamp[0], > event->timestamp[3]); > + /* If timestamp overflow, set queued time to 0 and re-calculate. */ > + for (i = 0; i < 4; i++) { > + if (event->timestamp[i + 1] < event->timestamp[i]) { > + re_cal = CL_TRUE; > + break; > + } > + } > + > + if (re_cal) { > + for (i = 3; i >= 0; i--) { > + if (event->timestamp[i + 1] < event->timestamp[i]) { //overflow > + ts[i] = event->timestamp[i + 1] + (CL_EVENT_INVALID_TIMESTAMP - > event->timestamp[i]); > + } else { > + ts[i] = event->timestamp[i + 1] - event->timestamp[i]; > + } > + } > > - return ret_val; > + event->timestamp[0] = 0; > + for (i = 1; i < 5; i++) { > + event->timestamp[i] = event->timestamp[i - 1] + ts[i - 1]; > + } > + } > + } > } > > LOCAL void > @@ -88,6 +149,7 @@ static cl_event > cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type, > cl_uint num_events, cl_event *event_list) { > + int i; > cl_event e = cl_calloc(1, sizeof(_cl_event)); > if (e == NULL) > return NULL; > @@ -115,6 +177,10 @@ cl_event_new(cl_context ctx, cl_command_queue queue, > cl_command_type type, > > e->depend_events = event_list; > e->depend_event_num = num_events; > + for (i = 0; i < 4; i++) { > + e->timestamp[i] = CL_EVENT_INVALID_TIMESTAMP; } > + > return e; > } > > @@ -317,6 +383,16 @@ cl_event_set_status(cl_event event, cl_int status) > return CL_INVALID_OPERATION; > } > > + if (status >= CL_COMPLETE && !CL_EVENT_IS_USER(event) && > + (event->queue->props & CL_QUEUE_PROFILING_ENABLE) != 0) { > + // Call update_timestamp without event lock. > + CL_OBJECT_TAKE_OWNERSHIP_WITHLOCK(event, 1); > + CL_OBJECT_UNLOCK(event); > + cl_event_update_timestamp(event, event->status, status); > + CL_OBJECT_LOCK(event); > + CL_OBJECT_RELEASE_OWNERSHIP_WITHLOCK(event); > + } > + > event->status = status; > > /* Call all the callbacks. */ > diff --git a/src/cl_event.h b/src/cl_event.h index f67299c..9df5ab6 100644 > --- a/src/cl_event.h > +++ b/src/cl_event.h > @@ -48,8 +48,7 @@ typedef struct _cl_event { > cl_uint depend_event_num; /* The depend events number. */ > list_head callbacks; /* The events The event callback functions */ > list_head enqueue_node; /* The node in the enqueue list. */ > - cl_ulong timestamp[4]; /* The time stamps for profiling. */ > - cl_ulong queued_timestamp; > + cl_ulong timestamp[5]; /* The time stamps for profiling. */ > enqueue_data exec_data; /* Context for execute this event. */ } _cl_event; > > @@ -62,6 +61,8 @@ typedef struct _cl_event { #define CL_EVENT_IS_BARRIER(E) > (E->event_type == CL_COMMAND_BARRIER) #define CL_EVENT_IS_USER(E) > (E->event_type == CL_COMMAND_USER) > > +#define CL_EVENT_INVALID_TIMESTAMP 0xFFFFFFFFFFFFFFFF > + > /* Create a new event object */ > extern cl_event cl_event_create(cl_context ctx, cl_command_queue queue, > cl_uint num_events, > const cl_event *event_list, cl_command_type > type, cl_int *errcode_ret); @@ -78,11 +79,8 @@ extern cl_int > cl_event_set_callback(cl_event event, cl_int exec_type, > cl_event_notify_cb pfn_notify, void > *user_data); extern cl_int cl_event_wait_for_events_list(cl_uint num_events, > const cl_event *event_list); extern cl_int > cl_event_wait_for_event_ready(cl_event event); -extern cl_ulong > cl_event_get_timestamp_delta(cl_ulong start_timestamp, cl_ulong > end_timestamp); -extern cl_ulong cl_event_get_start_timestamp(cl_event > event); -extern cl_ulong cl_event_get_end_timestamp(cl_event event); -extern > cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name); > extern cl_event cl_event_create_marker_or_barrier(cl_command_queue queue, > cl_uint num_events_in_wait_list, > const cl_event > *event_wait_list, cl_bool is_barrier, > cl_int* error); > +extern void cl_event_update_timestamp(cl_event event, cl_int > +from_status, cl_int to_status); > #endif /* __CL_EVENT_H__ */ > diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index > d56d35d..e35b9d1 100644 > --- a/src/intel/intel_gpgpu.c > +++ b/src/intel/intel_gpgpu.c > @@ -2277,10 +2277,10 @@ intel_gpgpu_read_ts_reg_baytrail(drm_intel_bufmgr > *bufmgr) > > /* We want to get the current time of GPU. */ static void > -intel_gpgpu_event_get_gpu_cur_timestamp(intel_gpgpu_t* gpgpu, uint64_t* > ret_ts) > +intel_gpgpu_event_get_gpu_cur_timestamp(intel_driver_t* gen_driver, > +uint64_t* ret_ts) > { > uint64_t result = 0; > - drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr; > + drm_intel_bufmgr *bufmgr = gen_driver->bufmgr; > > /* Get the ts that match the bspec */ > result = intel_gpgpu_read_ts_reg(bufmgr); @@ -2292,15 +2292,13 @@ > intel_gpgpu_event_get_gpu_cur_timestamp(intel_gpgpu_t* gpgpu, uint64_t* > ret_ts) > > /* Get the GPU execute time. */ > static void > -intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, intel_event_t > *event, > - int index, uint64_t* ret_ts) > +intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, int index, > +uint64_t* ret_ts) > { > uint64_t result = 0; > - > - assert(event->ts_buf != NULL); > + assert(gpgpu->time_stamp_b.bo); > assert(index == 0 || index == 1); > - drm_intel_gem_bo_map_gtt(event->ts_buf); > - uint64_t* ptr = event->ts_buf->virtual; > + drm_intel_gem_bo_map_gtt(gpgpu->time_stamp_b.bo); > + uint64_t* ptr = gpgpu->time_stamp_b.bo->virtual; > result = ptr[index]; > > /* According to BSpec, the timestamp counter should be 36 bits, @@ -2311,7 > +2309,7 @@ intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, > intel_event_t *event, > result = (result & 0x0FFFFFFFF) * 80; //convert to nanoseconds > *ret_ts = result; > > - drm_intel_gem_bo_unmap_gtt(event->ts_buf); > + drm_intel_gem_bo_unmap_gtt(gpgpu->time_stamp_b.bo); > } > > static int > -- > 2.7.4 > > > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet