Yes indeed, I think Rebecca patch is not ideal in that direction. Removing the lspci thing is right (by the way, I think that GetGenID.sh should be removed too at this point), but regarding whether OpenCL 2.0 is compiled or not I would rather think of something like that:
– if llvm39 and libdrm2466 are found, compile with OpenCL 2.0 by default. – if they’re not, compile without it. Issue an error message if -DENABLE_OPENCL_20=1 was passed. Bruno Le 23/01/2017 à 03:44, Pan, Xiuli a écrit : > I think if we want to enable OpenCL 2.0 as default we must check if there is > a LLVM 3.9 first, or there will have errors to ask people to update llvm. We > may turn the OCL 20 off when no llvm39 is found. > > -----Original Message----- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Rebecca N. Palmer > Sent: Sunday, January 22, 2017 9:16 PM > To: Bruno Pagani <bruno.n.pag...@gmail.com>; beignet@lists.freedesktop.org; > ignate...@redhat.com > Subject: [Beignet] [PATCH v2] Enable OpenCL 2.0 only where supported > > This allows a single beignet binary to both offer 2.0 where available, and > still work on older hardware. > (2.0 uses 64 bit pointers, which assert-fail on Ivybridge: > backend/src/backend/gen_context.cpp:2259). > > V2: > Default to 1.2 when -cl-std is not set (required by the OpenCL spec, and also > likely to be faster). Update documentation. > Unconditionally build 2.0-only tests, but skip them on non-2.0 hardware. Add > -cl-std=2.0 to tests that need it. > > Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com> > > --- a/CMakeLists.txt > +++ b/CMakeLists.txt > @@ -231,19 +231,8 @@ IF (EXPERIMENTAL_DOUBLE) > ADD_DEFINITIONS(-DENABLE_FP64) > ENDIF(EXPERIMENTAL_DOUBLE) > > -OPTION(ENABLE_OPENCL_20 "Enable opencl 2.0 support" OFF) > +OPTION(ENABLE_OPENCL_20 "Enable opencl 2.0 support" ON) > IF (ENABLE_OPENCL_20) > - Find_Program(LSPCI lspci) > - IF (NOT LSPCI) > - MESSAGE(FATAL_ERROR "Looking for lspci - not found") > - ENDIF (NOT LSPCI) > - EXECUTE_PROCESS(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/GetGenID.sh" > - RESULT_VARIABLE SUPPORT_OCL20_DEVICE > - OUTPUT_VARIABLE PCI_ID_NOT_USED) > - > - IF (NOT SUPPORT_OCL20_DEVICE EQUAL 1) > - MESSAGE(FATAL_ERROR "Only SKL and newer devices support OpenCL 2.0 now, > your device don't support.") > - ENDIF (NOT SUPPORT_OCL20_DEVICE EQUAL 1) > > IF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN) > MESSAGE(FATAL_ERROR "Please update libdrm to version 2.4.66 or later to > enable OpenCL 2.0.") > --- a/backend/src/backend/program.cpp > +++ b/backend/src/backend/program.cpp > @@ -31,6 +31,7 @@ > #include "ir/value.hpp" > #include "ir/unit.hpp" > #include "ir/printf.hpp" > +#include "../src/cl_device_data.h" > > #ifdef GBE_COMPILER_AVAILABLE > #include "llvm/llvm_to_gen.hpp" > @@ -855,6 +856,7 @@ namespace gbe { > size_t *errSize, > uint32_t &oclVersion) > { > + uint32_t maxoclVersion = oclVersion; > std::string pchFileName; > bool findPCH = false; > #if defined(__ANDROID__) > @@ -1022,15 +1024,9 @@ EXTEND_QUOTE: > } > > if (useDefaultCLCVersion) { > -#ifdef ENABLE_OPENCL_20 > - clOpt.push_back("-D__OPENCL_C_VERSION__=200"); > - clOpt.push_back("-cl-std=CL2.0"); > - oclVersion = 200; > -#else > clOpt.push_back("-D__OPENCL_C_VERSION__=120"); > clOpt.push_back("-cl-std=CL1.2"); > oclVersion = 120; > -#endif > } > //for clCompilerProgram usage. > if(temp_header_path){ > @@ -1061,7 +1057,12 @@ EXTEND_QUOTE: > clOpt.push_back("-include-pch"); > clOpt.push_back(pchFileName); > } > - > + if (oclVersion > maxoclVersion){ > + if (err && stringSize > 0 && errSize) { > + *errSize = snprintf(err, stringSize, "Requested OpenCL version %lf > is higher than maximum supported version %lf\n", > (float)oclVersion/100.0,(float)maxoclVersion/100.0); > + } > + return false; > + } > return true; > } > > @@ -1076,7 +1077,7 @@ EXTEND_QUOTE: > std::vector<std::string> clOpt; > std::string dumpLLVMFileName, dumpASMFileName; > std::string dumpSPIRBinaryName; > - uint32_t oclVersion = 0; > + uint32_t oclVersion = MAX_OCLVERSION(deviceID); > if (!processSourceAndOption(source, options, NULL, clOpt, > dumpLLVMFileName, dumpASMFileName, > dumpSPIRBinaryName, > optLevel, @@ -1139,7 +1140,7 @@ EXTEND_QUOTE: > std::vector<std::string> clOpt; > std::string dumpLLVMFileName, dumpASMFileName; > std::string dumpSPIRBinaryName; > - uint32_t oclVersion = 0; > + uint32_t oclVersion = MAX_OCLVERSION(deviceID); > if (!processSourceAndOption(source, options, temp_header_path, clOpt, > dumpLLVMFileName, dumpASMFileName, > dumpSPIRBinaryName, > optLevel, stringSize, err, errSize, > oclVersion)) > --- a/src/cl_device_data.h > +++ b/src/cl_device_data.h > @@ -363,5 +363,7 @@ > > #define IS_GEN9(devid) (IS_SKYLAKE(devid) || IS_BROXTON(devid) || > IS_KABYLAKE(devid)) > > +#define MAX_OCLVERSION(devid) (IS_GEN9(devid) ? 200 : 120) > + > #endif /* __CL_DEVICE_DATA_H__ */ > > --- a/src/cl_gen9_device.h > +++ b/src/cl_gen9_device.h > @@ -27,5 +27,7 @@ > .max_mem_alloc_size = 4 * 1024 * 1024 * 1024ul, .global_mem_size = 4 * 1024 > * 1024 * 1024ul, > > +#define GEN9_DEVICE 1 > #include "cl_gt_device.h" > +#undef GEN9_DEVICE > > --- a/src/cl_gt_device.h > +++ b/src/cl_gt_device.h > @@ -16,7 +16,13 @@ > * > * Author: Benjamin Segovia <benjamin.sego...@intel.com> > */ > - > +#ifdef GEN9_DEVICE > +#define LIBCL_VERSION_STRING GEN9_LIBCL_VERSION_STRING #define > +LIBCL_C_VERSION_STRING GEN9_LIBCL_C_VERSION_STRING #else #define > +LIBCL_VERSION_STRING NONGEN9_LIBCL_VERSION_STRING #define > +LIBCL_C_VERSION_STRING NONGEN9_LIBCL_C_VERSION_STRING #endif > /* Common fields for both all GT devices (IVB / SNB) */ .device_type = > CL_DEVICE_TYPE_GPU, > .device_id=0,/* == device_id (set when requested) */ @@ -39,7 +45,7 @@ > .native_vector_width_float = 4, .native_vector_width_double = 2, > .native_vector_width_half = 8, -#ifdef ENABLE_OPENCL_20 > +#if defined(ENABLE_OPENCL_20) && defined (GEN9_DEVICE) > .address_bits = 64, > #else > .address_bits = 32, > --- a/src/cl_platform_id.c > +++ b/src/cl_platform_id.c > @@ -32,7 +32,7 @@ > > static struct _cl_platform_id intel_platform_data = { > DECL_INFO_STRING(profile, "FULL_PROFILE") > - DECL_INFO_STRING(version, LIBCL_VERSION_STRING) > + DECL_INFO_STRING(version, GEN9_LIBCL_VERSION_STRING) > DECL_INFO_STRING(name, "Intel Gen OCL Driver") > DECL_INFO_STRING(vendor, "Intel") > DECL_INFO_STRING(icd_suffix_khr, "Intel") > --- a/src/cl_platform_id.h > +++ b/src/cl_platform_id.h > @@ -72,8 +72,10 @@ extern cl_int cl_get_platform_ids(cl_uin #else #define > LIBCL_DRIVER_VERSION_STRING _JOINT(LIBCL_DRIVER_VERSION_MAJOR, > LIBCL_DRIVER_VERSION_MINOR) #endif -#define LIBCL_VERSION_STRING "OpenCL " > _JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " > LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING -#define > LIBCL_C_VERSION_STRING "OpenCL C " _JOINT(LIBCL_C_VERSION_MAJOR, > LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING > BEIGNET_GIT_SHA1_STRING > +#define GEN9_LIBCL_VERSION_STRING "OpenCL " > +_JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " > +LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING #define > +GEN9_LIBCL_C_VERSION_STRING "OpenCL C " _JOINT(LIBCL_C_VERSION_MAJOR, > +LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING > +BEIGNET_GIT_SHA1_STRING #define NONGEN9_LIBCL_VERSION_STRING "OpenCL > +1.2 beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING > +#define NONGEN9_LIBCL_C_VERSION_STRING "OpenCL C 1.2 beignet " > +LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING > > #endif /* __CL_PLATFORM_ID_H__ */ > > --- a/utests/CMakeLists.txt > +++ b/utests/CMakeLists.txt > @@ -297,7 +297,6 @@ if (LLVM_VERSION_NODOT VERSION_GREATER 34) > compiler_overflow.cpp) > endif (LLVM_VERSION_NODOT VERSION_GREATER 34) > > -if (ENABLE_OPENCL_20) > SET(utests_sources > ${utests_sources} > compiler_program_global.cpp > @@ -308,7 +307,6 @@ if (ENABLE_OPENCL_20) > runtime_pipe_query.cpp > compiler_pipe_builtin.cpp > compiler_device_enqueue.cpp) > -endif (ENABLE_OPENCL_20) > > if (NOT_BUILD_STAND_ALONE_UTEST) > if (X11_FOUND) > --- a/utests/compiler_atomic_functions_20.cpp > +++ b/utests/compiler_atomic_functions_20.cpp > @@ -64,8 +64,9 @@ static void compiler_atomic_functions(co > globals[0] = n; > locals[0] = LOCAL_SIZE; > > + if(!cl_check_ocl20(false)){return;} > // Setup kernel and buffers > - OCL_CREATE_KERNEL_FROM_FILE("compiler_atomic_functions_20", kernel_name); > + OCL_CALL(cl_kernel_init, "compiler_atomic_functions_20.cl", > + kernel_name, SOURCE, "-cl-std=CL2.0"); > OCL_CREATE_BUFFER(buf[0], 0, 24 * sizeof(int), NULL); > OCL_CREATE_BUFFER(buf[1], 0, locals[0] * sizeof(int), NULL); > OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); > --- a/utests/compiler_device_enqueue.cpp > +++ b/utests/compiler_device_enqueue.cpp > @@ -6,8 +6,9 @@ void compiler_device_enqueue(void) > const uint32_t global_sz = 3; > uint32_t result = 0; > > + if(!cl_check_ocl20(false)){return;} > // Setup kernel and buffers > - OCL_CREATE_KERNEL("compiler_device_enqueue"); > + OCL_CALL(cl_kernel_init, "compiler_device_enqueue.cl", > + "compiler_device_enqueue", SOURCE, "-cl-std=CL2.0"); > OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); > OCL_SET_ARG(0, sizeof(uint32_t), &global_sz); > OCL_SET_ARG(1, sizeof(cl_mem), &buf[0]); > --- a/utests/compiler_generic_atomic.cpp > +++ b/utests/compiler_generic_atomic.cpp > @@ -6,6 +6,7 @@ void test_atomic(const char* kernelName) > const int n = 16; > T cpu_src[16]; > > + if(!cl_check_ocl20(false)){return;} > // Setup kernel and buffers > OCL_CALL(cl_kernel_init, "compiler_generic_atomic.cl", kernelName, SOURCE, > "-cl-std=CL2.0"); > OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); > --- a/utests/compiler_generic_pointer.cpp > +++ b/utests/compiler_generic_pointer.cpp > @@ -6,6 +6,7 @@ void test(const char* kernelName) > const int n = 16; > T cpu_src[16]; > > + if(!cl_check_ocl20(false)){return;} > // Setup kernel and buffers > OCL_CALL(cl_kernel_init, "compiler_generic_pointer.cl", kernelName, > SOURCE, "-cl-std=CL2.0"); > OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); > --- a/utests/compiler_pipe_builtin.cpp > +++ b/utests/compiler_pipe_builtin.cpp > @@ -11,8 +11,9 @@ static void compiler_pipe_##GROUP##_##TYPE(void) \ > const size_t w = 16; \ > uint32_t ans_host = 0; \ > uint32_t ans_device = 0; \ > + if(!cl_check_ocl20(false)){return;}\ > /* pipe write kernel*/ \ > - OCL_CREATE_KERNEL_FROM_FILE("compiler_pipe_builtin", > "compiler_pipe_"#GROUP"_write_"#TYPE); \ > + OCL_CALL(cl_kernel_init, "compiler_pipe_builtin.cl", > + "compiler_pipe_"#GROUP"_write_"#TYPE, SOURCE, "-cl-std=CL2.0");\ > OCL_CALL2(clCreatePipe, buf[0], ctx, 0, sizeof(TYPE), w, NULL);\ > OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, w * sizeof(TYPE), NULL);\ > OCL_MAP_BUFFER(1);\ > @@ -26,7 +27,7 @@ static void compiler_pipe_##GROUP##_##TYPE(void) \ > OCL_NDRANGE(1);\ > OCL_CALL(clReleaseKernel, kernel);\ > /* pipe read kernel */\ > - OCL_CREATE_KERNEL_FROM_FILE("compiler_pipe_builtin", > "compiler_pipe_"#GROUP"_read_"#TYPE);\ > + OCL_CALL(cl_kernel_init, "compiler_pipe_builtin.cl", > + "compiler_pipe_"#GROUP"_read_"#TYPE, SOURCE, "-cl-std=CL2.0");\ > OCL_CREATE_BUFFER(buf[2], CL_MEM_READ_WRITE, w * sizeof(TYPE), NULL);\ > OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);\ > OCL_SET_ARG(1, sizeof(cl_mem), &buf[2]);\ @@ -52,8 +53,9 @@ > PIPE_BUILTIN(mystruct, workgroup) static void compiler_pipe_query(void) { > const size_t w = 32; > const size_t sz = 16; > + if(!cl_check_ocl20(false)){return;} > /* pipe write kernel */ > - OCL_CREATE_KERNEL_FROM_FILE("compiler_pipe_builtin", > "compiler_pipe_query"); > + OCL_CALL(cl_kernel_init, "compiler_pipe_builtin.cl", > + "compiler_pipe_query", SOURCE, "-cl-std=CL2.0"); > OCL_CALL2(clCreatePipe, buf[0], ctx, 0, sizeof(uint32_t), w, NULL); > OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, sz * sizeof(uint32_t), NULL); > OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); > --- a/utests/compiler_program_global.cpp > +++ b/utests/compiler_program_global.cpp > @@ -25,6 +25,7 @@ void compiler_program_global() > int cpu_src[16]; > cl_int err; > > + if(!cl_check_ocl20(false)){return;} > // Setup kernel and buffers > cl_program program; > init_program("compiler_program_global.cl", ctx, &program); > --- a/utests/runtime_pipe_query.cpp > +++ b/utests/runtime_pipe_query.cpp > @@ -4,6 +4,7 @@ static void runtime_pipe_query(void) { > const size_t w = 16; > const size_t sz = 8; > cl_uint retnum, retsz; > + if(!cl_check_ocl20(false)){return;} > /* pipe write kernel */ > OCL_CALL2(clCreatePipe, buf[0], ctx, 0, sz, w, NULL); > OCL_CALL(clGetPipeInfo, buf[0], CL_PIPE_MAX_PACKETS, sizeof(retnum), > &retnum, NULL); > --- a/utests/utest_helper.hpp > +++ b/utests/utest_helper.hpp > @@ -310,8 +310,8 @@ extern clGetKernelSubGroupInfoKHR_cb* ut > /* Check if cl_intel_motion_estimation enabled. */ extern int > cl_check_motion_estimation(void); > > -/* Check is cl version 2.0. */ > -extern int cl_check_ocl20(void); > +/* Check is cl version 2.0. (if or_beignet is set, also accept any > +beignet device, whatever its CL version)*/ extern int > +cl_check_ocl20(bool or_beignet=true); > > /* Check is FP16 enabled. */ > extern int cl_check_half(void); > --- a/utests/utest_helper.cpp > +++ b/utests/utest_helper.cpp > @@ -939,13 +939,14 @@ int cl_check_subgroups_short(void) > return 1; > } > > -int cl_check_ocl20(void) > +int cl_check_ocl20(bool or_beignet) > { > size_t param_value_size; > size_t ret_sz; > OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_OPENCL_C_VERSION, 0, 0, > ¶m_value_size); > if(param_value_size == 0) { > printf("Not OpenCL 2.0 device, "); > + if(or_beignet){ > if(cl_check_beignet()) { > printf("Beignet extension test!"); > return 1; > @@ -953,6 +954,10 @@ int cl_check_ocl20(void) > printf("Not beignet device , Skip!"); > return 0; > } > + }else{ > + printf("Skip!"); > + return 0; > + } > } > char* device_version_str = (char* )malloc(param_value_size * sizeof(char) > ); > OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_OPENCL_C_VERSION, > param_value_size, (void*)device_version_str, &ret_sz); @@ -961,6 +966,7 @@ > int cl_check_ocl20(void) > if(!strstr(device_version_str, "2.0")) { > free(device_version_str); > printf("Not OpenCL 2.0 device, "); > + if(or_beignet){ > if(cl_check_beignet()) { > printf("Beignet extension test!"); > return 1; > @@ -968,6 +974,10 @@ int cl_check_ocl20(void) > printf("Not beignet device , Skip!"); > return 0; > } > + }else{ > + printf("Skip!"); > + return 0; > + } > } > free(device_version_str); > return 1; > --- a/docs/Beignet.mdwn > +++ b/docs/Beignet.mdwn > @@ -154,15 +154,9 @@ Supported Targets > > OpenCL 2.0 > ---------- > -From release v1.3.0, beignet support OpenCL 2.0. By default, OpenCL 2.0 > support is disabled, you can enable it when cmake with option > --DENABLE_OPENCL_20=1. Please remember that to enable OpenCL 2.0, there are > some dependencies. First, OpenCL 2.0 only support the targets -from Skylake, > include Skylake, Kabylake and Apollolake. Then, clang supports all OpenCL 2.0 > feature from 3.9. So to enable OpenCL 2.0, -you must update LLVM/clang to 3.9 > or later. And also requeires libdrm at least 2.4.66. > -After enable OpenCL 2.0, beignet complies with OpenCL 2.0 spec, but some > OpenCL 2.0 features are simulated by software, there is no performance -gain, > such as pipe and device queues, especially device queues. > -If you build beignet with OpenCL 2.0 enabled and your kernel don't use the > OpenCL 2.0 features, please pass a build option -cl-std=CL1.2 for > -performance, the OpenCL 2.0 uses more registers and has lots of int64 > operations, which may hurt performance, and beignet will continue to improve > -OpenCL 2.0 performance. > +From release v1.3.0, beignet supports OpenCL 2.0 on Skylake and later > hardware. > +This requires LLVM/Clang 3.9 or later and libdrm 2.4.66 or later; if you do > not have these or do not want 2.0, you can disable it by passing > -DENABLE_OPENCL_20=0 to cmake. > +As required by the OpenCL specification, kernels are compiled as OpenCL C > 1.2 by default; to use 2.0 they must explicitly request it with the > -cl-std=CL2.0 build option. As OpenCL 2.0 is likely to be slower than 1.2, > we recommend that this is used only where needed. (This is because 2.0 uses > more registers and has lots of int64 operations, and some of the 2.0 features > (pipes and especially device queues) are implemented in software so do not > provide any performance gain.) Beignet will continue to improve OpenCL 2.0 > performance. > > Known Issues > ------------ > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/beignet
signature.asc
Description: OpenPGP digital signature
_______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet