LGTM, pushed, thanks.
On Fri, Nov 29, 2013 at 10:55:54AM +0800, [email protected] wrote: > From: Junyan He <[email protected]> > > The profiling feature is now all supported. We use > drm_intel_reg_read to get the current time of GPU > when the event is queued and submitted, and use > PIPI_CONTROL cmd to get the executing time of the > GPU for kernel start and end. > One trivial problem is that: > The GPU timer counter is 36 bits with resolution of > 80ns, so 2^36*80 = 5500s, about half an hour. > Some test may last about 2~5 min and if it starts at > about half an hour, this may cause a wrap back problem > and cause the case fail. > > Signed-off-by: Junyan He <[email protected]> > --- > src/cl_api.c | 78 > +++++++++++++++++++++++++++++++++++++++---------- > src/cl_driver.h | 8 +++-- > src/cl_driver_defs.c | 3 +- > src/cl_enqueue.c | 19 ++++++++---- > src/cl_enqueue.h | 2 +- > src/cl_event.c | 27 ++++++++++------- > src/cl_event.h | 3 +- > src/intel/intel_gpgpu.c | 36 +++++++++++++++++++++-- > 8 files changed, 137 insertions(+), 39 deletions(-) > > diff --git a/src/cl_api.c b/src/cl_api.c > index 0978129..35d0984 100644 > --- a/src/cl_api.c > +++ b/src/cl_api.c > @@ -70,6 +70,13 @@ handle_events(cl_command_queue queue, cl_int num, const > cl_event *wait_list, > cl_event e; > if(event != NULL || status == CL_ENQUEUE_EXECUTE_DEFER) { > e = cl_event_new(queue->ctx, queue, type, event!=NULL); > + > + /* if need profiling, add the submit timestamp here. */ > + if (e->type != CL_COMMAND_USER && > + e->queue->props & CL_QUEUE_PROFILING_ENABLE) { > + cl_event_get_timestamp(e, CL_PROFILING_COMMAND_QUEUED); > + } > + > if(event != NULL) > *event = e; > if(status == CL_ENQUEUE_EXECUTE_DEFER) { > @@ -1270,16 +1277,25 @@ clGetEventProfilingInfo(cl_event event, > goto error; > } > > - if ((param_name != CL_PROFILING_COMMAND_QUEUED && > - param_name != CL_PROFILING_COMMAND_SUBMIT && > - param_name != CL_PROFILING_COMMAND_START && > - param_name != CL_PROFILING_COMMAND_END) || > - (param_value && param_value_size < sizeof(cl_ulong))) { > + if (param_value && param_value_size < sizeof(cl_ulong)) { > err = CL_INVALID_VALUE; > goto error; > } > > - err = cl_event_profiling(event, param_name, &ret_val); > + if (param_name == CL_PROFILING_COMMAND_QUEUED) { > + ret_val = event->timestamp[0]; > + } else if (param_name == CL_PROFILING_COMMAND_SUBMIT) { > + ret_val = event->timestamp[1]; > + } else if (param_name == CL_PROFILING_COMMAND_START) { > + err = cl_event_get_timestamp(event, CL_PROFILING_COMMAND_START); > + ret_val = event->timestamp[2]; > + } else if (param_name == CL_PROFILING_COMMAND_END) { > + err = cl_event_get_timestamp(event, CL_PROFILING_COMMAND_END); > + ret_val = event->timestamp[3]; > + } else { > + err = CL_INVALID_VALUE; > + goto error; > + } > > if (err == CL_SUCCESS) { > if (param_value) > @@ -1354,7 +1370,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_READ_BUFFER) == > CL_ENQUEUE_EXECUTE_IMM) { > - err = cl_enqueue_handle(data); > + err = cl_enqueue_handle(event ? *event : NULL, data); > if(event) cl_event_set_status(*event, CL_COMPLETE); > } > > @@ -1437,7 +1453,7 @@ clEnqueueReadBufferRect(cl_command_queue command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_READ_BUFFER_RECT) == > CL_ENQUEUE_EXECUTE_IMM) { > - err = cl_enqueue_handle(data); > + err = cl_enqueue_handle(event ? *event : NULL, data); > if(event) cl_event_set_status(*event, CL_COMPLETE); > } > > @@ -1487,7 +1503,7 @@ clEnqueueWriteBuffer(cl_command_queue command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_WRITE_BUFFER) == > CL_ENQUEUE_EXECUTE_IMM) { > - err = cl_enqueue_handle(data); > + err = cl_enqueue_handle(event ? *event : NULL, data); > if(event) cl_event_set_status(*event, CL_COMPLETE); > } > > @@ -1570,7 +1586,7 @@ clEnqueueWriteBufferRect(cl_command_queue > command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_WRITE_BUFFER_RECT) == > CL_ENQUEUE_EXECUTE_IMM) { > - err = cl_enqueue_handle(data); > + err = cl_enqueue_handle(event ? *event : NULL, data); > if(event) cl_event_set_status(*event, CL_COMPLETE); > } > > @@ -1649,6 +1665,11 @@ clEnqueueCopyBuffer(cl_command_queue command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_COPY_BUFFER) == > CL_ENQUEUE_EXECUTE_IMM) { > + if (event && (*event)->type != CL_COMMAND_USER > + && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { > + cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); > + } > + > err = cl_command_queue_flush(command_queue); > } > return 0; > @@ -1740,6 +1761,11 @@ clEnqueueCopyBufferRect(cl_command_queue > command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_COPY_BUFFER_RECT) == > CL_ENQUEUE_EXECUTE_IMM) { > + if (event && (*event)->type != CL_COMMAND_USER > + && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { > + cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); > + } > + > err = cl_command_queue_flush(command_queue); > } > > @@ -1818,7 +1844,7 @@ clEnqueueReadImage(cl_command_queue command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_READ_IMAGE) == > CL_ENQUEUE_EXECUTE_IMM) { > - err = cl_enqueue_handle(data); > + err = cl_enqueue_handle(event ? *event : NULL, data); > if(event) cl_event_set_status(*event, CL_COMPLETE); > } > > @@ -1897,7 +1923,7 @@ clEnqueueWriteImage(cl_command_queue command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_WRITE_IMAGE) == > CL_ENQUEUE_EXECUTE_IMM) { > - err = cl_enqueue_handle(data); > + err = cl_enqueue_handle(event ? *event : NULL, data); > if(event) cl_event_set_status(*event, CL_COMPLETE); > } > > @@ -1974,6 +2000,11 @@ clEnqueueCopyImage(cl_command_queue command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_COPY_IMAGE) == > CL_ENQUEUE_EXECUTE_IMM) { > + if (event && (*event)->type != CL_COMMAND_USER > + && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { > + cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); > + } > + > err = cl_command_queue_flush(command_queue); > } > > @@ -2030,6 +2061,11 @@ clEnqueueCopyImageToBuffer(cl_command_queue > command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_COPY_IMAGE_TO_BUFFER) == > CL_ENQUEUE_EXECUTE_IMM) { > + if (event && (*event)->type != CL_COMMAND_USER > + && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { > + cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); > + } > + > err = cl_command_queue_flush(command_queue); > } > > @@ -2086,6 +2122,11 @@ clEnqueueCopyBufferToImage(cl_command_queue > command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_COPY_BUFFER_TO_IMAGE) == > CL_ENQUEUE_EXECUTE_IMM) { > + if (event && (*event)->type != CL_COMMAND_USER > + && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { > + cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); > + } > + > err = cl_command_queue_flush(command_queue); > } > > @@ -2217,7 +2258,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_MAP_BUFFER) == > CL_ENQUEUE_EXECUTE_IMM) { > - err = cl_enqueue_handle(data); > + err = cl_enqueue_handle(event ? *event : NULL, data); > if(event) cl_event_set_status(*event, CL_COMPLETE); > } > > @@ -2313,7 +2354,7 @@ clEnqueueMapImage(cl_command_queue command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_MAP_IMAGE) == > CL_ENQUEUE_EXECUTE_IMM) { > - err = cl_enqueue_handle(data); > + err = cl_enqueue_handle(event ? *event : NULL, data); > if(event) cl_event_set_status(*event, CL_COMPLETE); > } > > @@ -2350,7 +2391,7 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_UNMAP_MEM_OBJECT) == > CL_ENQUEUE_EXECUTE_IMM) { > - err = cl_enqueue_handle(data); > + err = cl_enqueue_handle(event ? *event : NULL, data); > if(event) cl_event_set_status(*event, CL_COMPLETE); > } > > @@ -2456,6 +2497,11 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_NDRANGE_KERNEL) == > CL_ENQUEUE_EXECUTE_IMM) { > + if (event && (*event)->type != CL_COMMAND_USER > + && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { > + cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); > + } > + > err = cl_command_queue_flush(command_queue); > } > > @@ -2535,7 +2581,7 @@ clEnqueueNativeKernel(cl_command_queue command_queue, > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > event, data, CL_COMMAND_NATIVE_KERNEL) == > CL_ENQUEUE_EXECUTE_IMM) { > - err = cl_enqueue_handle(data); > + err = cl_enqueue_handle(event ? *event : NULL, data); > if(event) cl_event_set_status(*event, CL_COMPLETE); > } > > diff --git a/src/cl_driver.h b/src/cl_driver.h > index 8efe1e7..a34c22e 100644 > --- a/src/cl_driver.h > +++ b/src/cl_driver.h > @@ -193,8 +193,12 @@ typedef void (cl_gpgpu_event_delete_cb)(cl_gpgpu_event); > extern cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete; > > /* Get a event time stamp */ > -typedef void (cl_gpgpu_event_get_timestamp_cb)(cl_gpgpu_event, int, > uint64_t*); > -extern cl_gpgpu_event_get_timestamp_cb *cl_gpgpu_event_get_timestamp; > +typedef void (cl_gpgpu_event_get_exec_timestamp_cb)(cl_gpgpu_event, int, > uint64_t*); > +extern cl_gpgpu_event_get_exec_timestamp_cb > *cl_gpgpu_event_get_exec_timestamp; > + > +/* Get current GPU time stamp */ > +typedef void (cl_gpgpu_event_get_gpu_cur_timestamp_cb)(cl_gpgpu, uint64_t*); > +extern cl_gpgpu_event_get_gpu_cur_timestamp_cb > *cl_gpgpu_event_get_gpu_cur_timestamp; > > /* Will spawn all threads */ > typedef void (cl_gpgpu_walker_cb)(cl_gpgpu, > diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c > index 54fa62e..b46799a 100644 > --- a/src/cl_driver_defs.c > +++ b/src/cl_driver_defs.c > @@ -80,5 +80,6 @@ LOCAL cl_gpgpu_event_update_status_cb > *cl_gpgpu_event_update_status = NULL; > LOCAL cl_gpgpu_event_pending_cb *cl_gpgpu_event_pending = NULL; > LOCAL cl_gpgpu_event_resume_cb *cl_gpgpu_event_resume = NULL; > LOCAL cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete = NULL; > -LOCAL cl_gpgpu_event_get_timestamp_cb *cl_gpgpu_event_get_timestamp = NULL; > +LOCAL cl_gpgpu_event_get_exec_timestamp_cb > *cl_gpgpu_event_get_exec_timestamp = NULL; > +LOCAL cl_gpgpu_event_get_gpu_cur_timestamp_cb > *cl_gpgpu_event_get_gpu_cur_timestamp = NULL; > > diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c > index 070fd98..330d230 100644 > --- a/src/cl_enqueue.c > +++ b/src/cl_enqueue.c > @@ -16,16 +16,18 @@ > * > * Author: Rong Yang <[email protected]> > */ > +#include <stdio.h> > +#include <string.h> > +#include <assert.h> > +#include <pthread.h> > > #include "cl_enqueue.h" > #include "cl_image.h" > #include "cl_driver.h" > +#include "cl_event.h" > +#include "cl_command_queue.h" > #include "cl_utils.h" > > -#include <stdio.h> > -#include <string.h> > -#include <assert.h> > -#include <pthread.h> > > cl_int cl_enqueue_read_buffer(enqueue_data* data) > { > @@ -376,8 +378,15 @@ cl_int cl_enqueue_native_kernel(enqueue_data *data) > error: > return err; > } > -cl_int cl_enqueue_handle(enqueue_data* data) > + > +cl_int cl_enqueue_handle(cl_event event, enqueue_data* data) > { > + /* if need profiling, add the submit timestamp here. */ > + if (event && event->type != CL_COMMAND_USER > + && event->queue->props & CL_QUEUE_PROFILING_ENABLE) { > + cl_event_get_timestamp(event, CL_PROFILING_COMMAND_SUBMIT); > + } > + > switch(data->type) { > case EnqueueReadBuffer: > return cl_enqueue_read_buffer(data); > diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h > index b412d58..1d3ae5f 100644 > --- a/src/cl_enqueue.h > +++ b/src/cl_enqueue.h > @@ -64,5 +64,5 @@ typedef struct _enqueue_data { > } enqueue_data; > > /* Do real enqueue commands */ > -cl_int cl_enqueue_handle(enqueue_data* data); > +cl_int cl_enqueue_handle(cl_event event, enqueue_data* data); > #endif /* __CL_ENQUEUE_H__ */ > diff --git a/src/cl_event.c b/src/cl_event.c > index 028dfb6..f838a3a 100644 > --- a/src/cl_event.c > +++ b/src/cl_event.c > @@ -380,7 +380,7 @@ void cl_event_set_status(cl_event event, cl_int status) > > if(status <= CL_COMPLETE) { > if(event->enqueue_cb) { > - cl_enqueue_handle(&event->enqueue_cb->data); > + cl_enqueue_handle(event, &event->enqueue_cb->data); > if(event->gpgpu_event) > cl_gpgpu_event_update_status(event->gpgpu_event, 1); //now set > complet, need refine > event->status = status; //Change the event status after enqueue and > befor unlock > @@ -496,22 +496,29 @@ cl_int cl_event_marker(cl_command_queue queue, > cl_event* event) > return CL_SUCCESS; > } > > -cl_int cl_event_profiling(cl_event event, cl_profiling_info param_name, > cl_ulong *ret_val) > +cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name) > { > + cl_ulong ret_val = 0; > + GET_QUEUE_THREAD_GPGPU(event->queue); > + > if (!event->gpgpu_event) { > - /* Some event like read buffer do not need GPU involved, so > - we just return all the profiling to 0 now. */ > - *ret_val = 0; > + cl_gpgpu_event_get_gpu_cur_timestamp(gpgpu, &ret_val); > + event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val; > return CL_SUCCESS; > } > > - if(param_name == CL_PROFILING_COMMAND_START || > - param_name == CL_PROFILING_COMMAND_QUEUED || > - param_name == CL_PROFILING_COMMAND_SUBMIT) { > - cl_gpgpu_event_get_timestamp(event->gpgpu_event, 0, ret_val); > + if(param_name == CL_PROFILING_COMMAND_SUBMIT || > + param_name == CL_PROFILING_COMMAND_QUEUED) { > + cl_gpgpu_event_get_gpu_cur_timestamp(gpgpu, &ret_val); > + event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val; > + return CL_SUCCESS; > + } else if(param_name == CL_PROFILING_COMMAND_START) { > + cl_gpgpu_event_get_exec_timestamp(event->gpgpu_event, 0, &ret_val); > + event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val; > return CL_SUCCESS; > } else if (param_name == CL_PROFILING_COMMAND_END) { > - cl_gpgpu_event_get_timestamp(event->gpgpu_event, 1, ret_val); > + cl_gpgpu_event_get_exec_timestamp(event->gpgpu_event, 1, &ret_val); > + event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val; > return CL_SUCCESS; > } else { > return CL_INVALID_VALUE; > diff --git a/src/cl_event.h b/src/cl_event.h > index 722486a..3c61110 100644 > --- a/src/cl_event.h > +++ b/src/cl_event.h > @@ -68,6 +68,7 @@ struct _cl_event { > enqueue_callback* enqueue_cb; /* This event's enqueue */ > enqueue_callback* waits_head; /* The head of enqueues list wait on this > event */ > cl_bool emplict; /* Identify this event whether created by > api emplict*/ > + cl_ulong timestamp[4];/* The time stamps for profiling. */ > }; > > /* Create a new event object */ > @@ -91,6 +92,6 @@ void cl_event_update_status(cl_event); > /* Create the marker event */ > cl_int cl_event_marker(cl_command_queue, cl_event*); > /* Do the event profiling */ > -cl_int cl_event_profiling(cl_event event, cl_profiling_info param_name, > cl_ulong *ret_val); > +cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name); > #endif /* __CL_EVENT_H__ */ > > diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c > index 7be9059..b1597ac 100644 > --- a/src/intel/intel_gpgpu.c > +++ b/src/intel/intel_gpgpu.c > @@ -51,6 +51,8 @@ > #define MO_RETAIN_BIT (1 << 28) > #define SAMPLER_STATE_SIZE (16) > > +#define TIMESTAMP_ADDR 0x2358 > + > /* Stores both binding tables and surface states */ > typedef struct surface_heap { > uint32_t binding_table[256]; > @@ -1041,15 +1043,42 @@ intel_gpgpu_event_delete(intel_event_t *event) > cl_free(event); > } > > +/* We want to get the current time of GPU. */ > +static void > +intel_gpgpu_event_get_gpu_cur_timestamp(intel_gpgpu_t* gpgpu, uint64_t* > ret_ts) > +{ > + uint64_t result = 0; > + drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr; > + > + drm_intel_reg_read(bufmgr, TIMESTAMP_ADDR, &result); > + result = result & 0xFFFFFFFFF0000000; > + result = result >> 28; > + result *= 80; > + > + *ret_ts = result; > + return; > +} > + > +/* Get the GPU execute time. */ > static void > -intel_gpgpu_event_get_timestamp(intel_event_t *event, int index, uint64_t* > ret_ts) > +intel_gpgpu_event_get_exec_timestamp(intel_event_t *event, > + int index, uint64_t* ret_ts) > { > + uint64_t result = 0; > + > assert(event->ts_buf != NULL); > assert(index == 0 || index == 1); > drm_intel_gem_bo_map_gtt(event->ts_buf); > uint64_t* ptr = event->ts_buf->virtual; > + result = ptr[index]; > + > + /* According to BSpec, the timestamp counter should be 36 bits, > + but comparing to the timestamp counter from IO control reading, > + we find the first 4 bits seems to be fake. In order to keep the > + timestamp counter conformable, we just skip the first 4 bits. */ > + result = ((result & 0x0FFFFFFFF) << 4) * 80; //convert to nanoseconds > + *ret_ts = result; > > - *ret_ts = ptr[index] * 80; //convert to nanoseconds > drm_intel_gem_bo_unmap_gtt(event->ts_buf); > } > > @@ -1080,6 +1109,7 @@ intel_set_gpgpu_callbacks(void) > cl_gpgpu_event_pending = (cl_gpgpu_event_pending_cb > *)intel_gpgpu_event_pending; > cl_gpgpu_event_resume = (cl_gpgpu_event_resume_cb > *)intel_gpgpu_event_resume; > cl_gpgpu_event_delete = (cl_gpgpu_event_delete_cb > *)intel_gpgpu_event_delete; > - cl_gpgpu_event_get_timestamp = (cl_gpgpu_event_get_timestamp_cb > *)intel_gpgpu_event_get_timestamp; > + cl_gpgpu_event_get_exec_timestamp = (cl_gpgpu_event_get_exec_timestamp_cb > *)intel_gpgpu_event_get_exec_timestamp; > + cl_gpgpu_event_get_gpu_cur_timestamp = > (cl_gpgpu_event_get_gpu_cur_timestamp_cb > *)intel_gpgpu_event_get_gpu_cur_timestamp; > } > > -- > 1.8.3.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
