Re: [Beignet] [PATCH] Make in-order command queues actually be in-order
A demonstration that "in-order" queues currently aren't: //g++ -o queue_order_test queue_order_test.c -lOpenCL //Depends: beignet-opencl-icd ocl-icd-opencl-dev #include #include int main() { cl_int status; cl_device_id device; clGetDeviceIDs(NULL,CL_DEVICE_TYPE_ALL,1,,NULL); char device_name[101]; device_name[100]=0; clGetDeviceInfo(device,CL_DEVICE_NAME,100,device_name,NULL); printf("Using device %s",device_name); cl_context ctx; cl_command_queue queue; cl_program program1,program2; cl_kernel kernel1,kernel2; cl_mem buffer; cl_event uevent1,uevent2,kernels_finished[2]; size_t n = 3; cl_int test_data[3] = {3, 7, 5}; const char* kernel1_source = "__kernel void test1(__global int *buf) {" "printf(\"kern1 \");" " buf[get_global_id(0)] = 2* buf[get_global_id(0)];" "}"; const char* kernel2_source = "__kernel void test2(__global int *buf) {" "printf(\"kern2 \");" " buf[get_global_id(0)] = 9+ buf[get_global_id(0)];" "}"; //Expected result: 15 23 19 if 1 runs first (in-order queue), 24 32 28 if 2 runs first (out-of-order queue) ctx = clCreateContext(NULL, 1, , NULL, NULL, ); if(!ctx) return 1; //cl_queue_properties qsettings[3]={CL_QUEUE_PROPERTIES,CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,0}; cl_queue_properties qsettings[3]={CL_QUEUE_PROPERTIES,0,0}; queue = clCreateCommandQueueWithProperties(ctx, device, qsettings, ); //queue = clCreateCommandQueueWithProperties(ctx, device, 0, ); cl_command_queue_properties qp; clGetCommandQueueInfo(queue,CL_QUEUE_PROPERTIES,sizeof(qp),,NULL); printf(" queue properties %i\n",qp); program1 = clCreateProgramWithSource(ctx, 1, _source, NULL, ); clBuildProgram(program1, 1, , "", NULL, NULL); kernel1 = clCreateKernel(program1, "test1", ); program2 = clCreateProgramWithSource(ctx, 1, _source, NULL, ); clBuildProgram(program2, 1, , "", NULL, NULL); kernel2 = clCreateKernel(program2, "test2", ); buffer = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, n*4, test_data, ); uevent1=clCreateUserEvent(ctx,); uevent2=clCreateUserEvent(ctx,); clSetKernelArg(kernel1, 0, sizeof(cl_mem), ); clSetKernelArg(kernel2, 0, sizeof(cl_mem), ); clEnqueueNDRangeKernel(queue, kernel1, 1, NULL, , , 1,, _finished[0]); clEnqueueNDRangeKernel(queue, kernel2, 1, NULL, , , 0,NULL, _finished[1]);//without uevent2, bypasses queue //clEnqueueNDRangeKernel(queue, kernel2, 1, NULL, , , 1,, _finished[1]); clSetUserEventStatus(uevent2,CL_COMPLETE); printf("\nsetting event %p (others %p %p) - enter a number\n",uevent1,kernels_finished[0],kernels_finished[1]); int j;scanf("%i",); clSetUserEventStatus(uevent1,CL_COMPLETE); clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, n*4, test_data, 2, kernels_finished, NULL); printf("\nresult: %i %i %i\n",test_data[0],test_data[1],test_data[2]); } ___ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] Make in-order command queues actually be in-order
When beignet added out-of-order execution support (7fd45f15), it made *all* command queues out-of-order, even if they were created as (and are reported by clGetCommandQueueInfo as) in-order. Signed-off-by: Rebecca N. Palmer --- Not sure whether this one is actually worth it: it's clearly against the spec, but I'm not aware of it causing any real-world bugs. (I noticed it while investigating an issue that turned out to be unrelated.) Users who expect a queue to be in-order are probably not using events, and that makes a beignet queue effectively in-order. (This is *not* true of out-of-order queues in some other ICDs, e.g. pocl: it is true in Beignet because our flush (in particular the implicit one before a blocking copy) is also an ordering barrier, but the spec doesn't require that. If you choose not to take this, it might be a good idea to add a comment to cl_command_queue_wait_flush documenting that.) --- a/src/cl_api.c +++ b/src/cl_api.c @@ -283,7 +283,7 @@ clEnqueueSVMFree (cl_command_queue comma data->size = num_svm_pointers; data->ptr = user_data; -if (e_status == CL_COMPLETE) { +if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -429,7 +429,7 @@ cl_int clEnqueueSVMMemcpy (cl_command_qu data->const_ptr= src_ptr; data->size = size; -if (e_status == CL_COMPLETE) { +if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -441,6 +441,9 @@ cl_int clEnqueueSVMMemcpy (cl_command_qu break; } cl_command_queue_enqueue_event(command_queue, e); + if (blocking_copy) { +cl_event_wait_for_events_list(1, ); + } } } while(0); @@ -518,7 +521,7 @@ cl_int clEnqueueSVMMemFill (cl_command_q data->pattern_size = pattern_size; data->size = size; -if (e_status == CL_COMPLETE) { +if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { --- a/src/cl_api_kernel.c +++ b/src/cl_api_kernel.c @@ -223,6 +223,7 @@ clEnqueueNDRangeKernel(cl_command_queue count *= global_wk_sz_rem[2] ? 2 : 1; const size_t *global_wk_all[2] = {global_wk_sz_div, global_wk_sz_rem}; +cl_bool allow_immediate_submit = cl_command_queue_allow_bypass_submit(command_queue); /* Go through the at most 8 cases and euque if there is work items left */ for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { @@ -263,7 +264,7 @@ clEnqueueNDRangeKernel(cl_command_queue break; } - err = cl_event_exec(e, (event_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED), CL_FALSE); + err = cl_event_exec(e, ((allow_immediate_submit && event_status == CL_COMPLETE) ? CL_SUBMITTED : CL_QUEUED), CL_FALSE); if (err != CL_SUCCESS) { break; } --- a/src/cl_api_mem.c +++ b/src/cl_api_mem.c @@ -309,7 +309,7 @@ clEnqueueMapBuffer(cl_command_queue comm if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) data->write_map = 1; -if (e_status == CL_COMPLETE) { +if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -322,6 +322,9 @@ clEnqueueMapBuffer(cl_command_queue comm } cl_command_queue_enqueue_event(command_queue, e); + if (blocking_map) { +cl_event_wait_for_events_list(1, ); + } } ptr = data->ptr; @@ -469,7 +472,7 @@ clEnqueueUnmapMemObject(cl_command_queue data->mem_obj = memobj; data->ptr = mapped_ptr; -if (e_status == CL_COMPLETE) { // No need to wait +if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // No need to wait err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { break; @@ -571,7 +574,7 @@ clEnqueueReadBuffer(cl_command_queue com data->offset = offset; data->size = size; -if (e_status == CL_COMPLETE) { +if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -583,6 +586,9 @@ clEnqueueReadBuffer(cl_command_queue com break; } cl_command_queue_enqueue_event(command_queue, e); + if (blocking_read) { +cl_event_wait_for_events_list(1, ); + } } } while (0); @@