Hello, I have recently had to port an application that uses OpenCL on integrated Intel GPUs from Windows to Linux. The issue is, Beignet doesn't call user defined event callbacks on operation completion. For example, if I enqueue a kernel with an event argument, add a callback to that event, then use the event's completion as a trigger to execute work that depends on the kernel's result, the callback, and therefore the dependent code, never gets called, resulting in a deadlock. (The described example works fine on Windows)
After searching for this issue, I found that multiple users have had the same issue with Beignet's event handling system, e.g. https://lists.freedesktop.org/archives/beignet/2015-August/005941.html and https://lists.freedesktop.org/archives/beignet/2015-September/006269.html The root cause of this problem is that Beignet "uses an on-demand manner to maintain the events' status". This results in the events not changing status until clFinish() or another "demanding" function is called. Also, this issue is not limited to callback execution. As seen in numerous reports, it affects clWaitForEvents() and profiling too. It would be great to get a rework of Beignet's event handling system to allow for proper use of events in applications, since currently it is noticeably crippled, or at least some information as to whether or not such a rework is in the works and when it may be available. System information: Processor: Intel Core i7-4790K Integrated graphics: Intel HD Graphics 4600 Ubuntu 14.04.4 Server Linux kernel 4.2.0-36-generic Beignet: Release 1.1.2 (compiled from source) Test code that demonstrates the issue (source provided here and in this GitHub repository: https://github.com/glebov-andrey/beignet_callback_bug_reproducer). NOTE: This example could be easily modified to wait for kernel completion using clFinish(). However the real application has a more complex multi-threaded structure and uses custom synchronization logic, therefore it cannot use clFinish() to wait for the kernel. // The following code uses C++11 features and has been tested with GCC 4.8.4 on Ubuntu 14.04 Server, // MSVC (11.0 and 14.0) and Intel C++ Compiler (15.0 and 16.0) on Windows 7. #include <cstring> #include <string> #include <condition_variable> #include <iostream> #include <stdexcept> #include <memory> #include <thread> #include <chrono> #if defined __INTEL_COMPILER && defined _MSC_VER # pragma warning(push) # pragma warning(disable: 1478) #elif defined(_MSC_VER) # pragma warning(push) # pragma warning(disable: 4996) #endif #include <CL/cl.h> #define HANDLE_CL_ERROR(function_name) \ if (error_code != CL_SUCCESS) { \ throw std::runtime_error(std::string("Error in " #function_name ": ") + std::to_string(error_code)); \ } #ifdef _WIN32 const auto beignet_platform_name = "Intel(R) OpenCL"; #else const auto beignet_platform_name = "Intel Gen OCL Driver"; #endif const auto kernel_source = "kernel void print_hello() { printf(\"Hello from OpenCL\\n\"); }"; std::mutex cond_mutex; std::condition_variable cond_var; bool kernel_complete = false; int main() { cl_int error_code = CL_SUCCESS; try { // find Intel platform cl_uint num_platforms = 0; error_code = clGetPlatformIDs(0, nullptr, &num_platforms); HANDLE_CL_ERROR(clGetPlatformIDs) std::unique_ptr<cl_platform_id[]> platform_ids( new cl_platform_id[static_cast<const std::size_t>(num_platforms)]); error_code = clGetPlatformIDs(num_platforms, platform_ids.get(), nullptr); HANDLE_CL_ERROR(clGetPlatformIDs) cl_platform_id platform = nullptr; for (std::size_t i = 0; i != static_cast<const std::size_t>(num_platforms); ++i) { std::size_t platform_name_size = 0; error_code = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, 0, nullptr, &platform_name_size); HANDLE_CL_ERROR(clGetPlatformInfo) std::unique_ptr<char[]> platform_name(new char[platform_name_size]); error_code = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, platform_name_size, platform_name.get(), nullptr); HANDLE_CL_ERROR(clGetPlatformInfo) if (std::strcmp(beignet_platform_name, platform_name.get()) == 0) { platform = platform_ids[i]; std::cout << "Platform: " << platform_name.get() << std::endl; break; } } if (platform == nullptr) { throw std::runtime_error(std::string("Couldn't find platform with name: ") + beignet_platform_name); } // find Intel GPU cl_device_id device = nullptr; error_code = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, nullptr); HANDLE_CL_ERROR(clGetDeviceIDs) std::size_t device_name_size = 0; error_code = clGetDeviceInfo(device, CL_DEVICE_NAME, 0, nullptr, &device_name_size); HANDLE_CL_ERROR(clGetDeviceInfo) std::unique_ptr<char[]> device_name(new char[device_name_size]); error_code = clGetDeviceInfo(device, CL_DEVICE_NAME, device_name_size, device_name.get(), nullptr); HANDLE_CL_ERROR(clGetDeviceInfo) std::cout << "Device: " << device_name.get() << std::endl; // create OpenCL context, command queue, program and kernel const auto context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &error_code); HANDLE_CL_ERROR(clCreateContext) const auto command_queue = clCreateCommandQueue(context, device, 0, &error_code); HANDLE_CL_ERROR(clCreateCommandQueue) const char *source_strings[1]; source_strings[0] = kernel_source; const std::size_t source_size = std::strlen(kernel_source); const auto program = clCreateProgramWithSource(context, 1, source_strings, &source_size, &error_code); HANDLE_CL_ERROR(clCreateProgramWithSource) error_code = clBuildProgram(program, 1, &device, "", nullptr, nullptr); HANDLE_CL_ERROR(clBuildProgram) const auto kernel = clCreateKernel(program, "print_hello", &error_code); HANDLE_CL_ERROR(clCreateKernel) // enqueue kernel and set event completion handler cl_event event; std::size_t global_work_size = 1; error_code = clEnqueueNDRangeKernel(command_queue, kernel, 1, nullptr, &global_work_size, nullptr, 0, nullptr, &event); HANDLE_CL_ERROR(clEnqueueNDRangeKernel) error_code = clSetEventCallback(event, CL_COMPLETE, [](cl_event, cl_int, void *) { std::cout << "OpenCL callback" << std::endl; // Notify the waiting thread that the kernel is completed { std::lock_guard<std::mutex> cond_lock(cond_mutex); kernel_complete = true; } cond_var.notify_one(); }, nullptr); HANDLE_CL_ERROR(clSetEventCallback) error_code = clFlush(command_queue); HANDLE_CL_ERROR(clFlush) // simulate work std::this_thread::sleep_for(std::chrono::seconds(1)); // do work, dependent on kernel completion { std::unique_lock<std::mutex> cond_lock(cond_mutex); while (!kernel_complete) { if (cond_var.wait_for(cond_lock, std::chrono::seconds(5)) == std::cv_status::timeout) { std::cout << "WARNING: A 5 second timeout has been reached on the condition variable.\n" " This may be a deadlock." << std::endl; } } } // When using Beignet, this will never be called as a deadlock will occur. std::cout << "Doing work, dependent on the kernel's completion" << std::endl; } catch (const std::exception &e) { std::cout << "Error: " << e.what() << std::endl; } catch (...) { std::cout << "Unknown error" << std::endl; } } #undef HANDLE_CL_ERROR #if defined __INTEL_COMPILER && defined _MSC_VER # pragma warning(pop) #elif defined(_MSC_VER) # pragma warning(pop) #endif Thanks, Andrey Glebov. _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
