Yes indeed, I think Rebecca patch is not ideal in that direction.

Removing the lspci thing is right (by the way, I think that GetGenID.sh
should be removed too at this point), but regarding whether OpenCL 2.0
is compiled or not I would rather think of something like that:

– if llvm39 and libdrm2466 are found, compile with OpenCL 2.0 by default.
– if they’re not, compile without it. Issue an error message if
-DENABLE_OPENCL_20=1 was passed.

Bruno

Le 23/01/2017 à 03:44, Pan, Xiuli a écrit :
> I think if we want to enable OpenCL 2.0 as default we must check if there is 
> a LLVM 3.9 first, or there will have errors to ask people to update llvm. We 
> may turn the OCL 20 off when no llvm39 is found.
>
> -----Original Message-----
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of 
> Rebecca N. Palmer
> Sent: Sunday, January 22, 2017 9:16 PM
> To: Bruno Pagani <bruno.n.pag...@gmail.com>; beignet@lists.freedesktop.org; 
> ignate...@redhat.com
> Subject: [Beignet] [PATCH v2] Enable OpenCL 2.0 only where supported
>
> This allows a single beignet binary to both offer 2.0 where available, and 
> still work on older hardware.
> (2.0 uses 64 bit pointers, which assert-fail on Ivybridge:
> backend/src/backend/gen_context.cpp:2259).
>
> V2: 
> Default to 1.2 when -cl-std is not set (required by the OpenCL spec, and also 
> likely to be faster).  Update documentation.
> Unconditionally build 2.0-only tests, but skip them on non-2.0 hardware.  Add 
> -cl-std=2.0 to tests that need it.
>
> Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>
>
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -231,19 +231,8 @@ IF (EXPERIMENTAL_DOUBLE)
>    ADD_DEFINITIONS(-DENABLE_FP64)
>  ENDIF(EXPERIMENTAL_DOUBLE)
>  
> -OPTION(ENABLE_OPENCL_20 "Enable opencl 2.0 support" OFF)
> +OPTION(ENABLE_OPENCL_20 "Enable opencl 2.0 support" ON)
>  IF (ENABLE_OPENCL_20)
> -  Find_Program(LSPCI lspci)
> -  IF (NOT LSPCI)
> -    MESSAGE(FATAL_ERROR "Looking for lspci - not found")
> -  ENDIF (NOT LSPCI)
> -  EXECUTE_PROCESS(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/GetGenID.sh"
> -                          RESULT_VARIABLE SUPPORT_OCL20_DEVICE
> -                          OUTPUT_VARIABLE PCI_ID_NOT_USED)
> -
> -  IF (NOT SUPPORT_OCL20_DEVICE EQUAL 1)
> -    MESSAGE(FATAL_ERROR "Only SKL and newer devices support OpenCL 2.0 now, 
> your device don't support.")
> -  ENDIF (NOT SUPPORT_OCL20_DEVICE EQUAL 1)
>  
>    IF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN)
>      MESSAGE(FATAL_ERROR "Please update libdrm to version 2.4.66 or later to 
> enable OpenCL 2.0.")
> --- a/backend/src/backend/program.cpp
> +++ b/backend/src/backend/program.cpp
> @@ -31,6 +31,7 @@
>  #include "ir/value.hpp"
>  #include "ir/unit.hpp"
>  #include "ir/printf.hpp"
> +#include "../src/cl_device_data.h"
>  
>  #ifdef GBE_COMPILER_AVAILABLE
>  #include "llvm/llvm_to_gen.hpp"
> @@ -855,6 +856,7 @@ namespace gbe {
>                                       size_t *errSize,
>                                       uint32_t &oclVersion)
>    {
> +    uint32_t maxoclVersion = oclVersion;
>      std::string pchFileName;
>      bool findPCH = false;
>  #if defined(__ANDROID__)
> @@ -1022,15 +1024,9 @@ EXTEND_QUOTE:
>      }
>  
>      if (useDefaultCLCVersion) {
> -#ifdef ENABLE_OPENCL_20
> -      clOpt.push_back("-D__OPENCL_C_VERSION__=200");
> -      clOpt.push_back("-cl-std=CL2.0");
> -      oclVersion = 200;
> -#else
>        clOpt.push_back("-D__OPENCL_C_VERSION__=120");
>        clOpt.push_back("-cl-std=CL1.2");
>        oclVersion = 120;
> -#endif
>      }
>      //for clCompilerProgram usage.
>      if(temp_header_path){
> @@ -1061,7 +1057,12 @@ EXTEND_QUOTE:
>        clOpt.push_back("-include-pch");
>        clOpt.push_back(pchFileName);
>      }
> -
> +    if (oclVersion > maxoclVersion){
> +      if (err && stringSize > 0 && errSize) {
> +         *errSize = snprintf(err, stringSize, "Requested OpenCL version %lf 
> is higher than maximum supported version %lf\n", 
> (float)oclVersion/100.0,(float)maxoclVersion/100.0);
> +      }
> +      return false;
> +    }
>      return true;
>    }
>  
> @@ -1076,7 +1077,7 @@ EXTEND_QUOTE:
>      std::vector<std::string> clOpt;
>      std::string dumpLLVMFileName, dumpASMFileName;
>      std::string dumpSPIRBinaryName;
> -    uint32_t oclVersion = 0;
> +    uint32_t oclVersion = MAX_OCLVERSION(deviceID);
>      if (!processSourceAndOption(source, options, NULL, clOpt,
>                                  dumpLLVMFileName, dumpASMFileName, 
> dumpSPIRBinaryName,
>                                  optLevel, @@ -1139,7 +1140,7 @@ EXTEND_QUOTE:
>      std::vector<std::string> clOpt;
>      std::string dumpLLVMFileName, dumpASMFileName;
>      std::string dumpSPIRBinaryName;
> -    uint32_t oclVersion = 0;
> +    uint32_t oclVersion = MAX_OCLVERSION(deviceID);
>      if (!processSourceAndOption(source, options, temp_header_path, clOpt,
>                                  dumpLLVMFileName, dumpASMFileName, 
> dumpSPIRBinaryName,
>                                  optLevel, stringSize, err, errSize, 
> oclVersion))
> --- a/src/cl_device_data.h
> +++ b/src/cl_device_data.h
> @@ -363,5 +363,7 @@
>  
>  #define IS_GEN9(devid)     (IS_SKYLAKE(devid) || IS_BROXTON(devid) || 
> IS_KABYLAKE(devid))
>  
> +#define MAX_OCLVERSION(devid) (IS_GEN9(devid) ? 200 : 120)
> +
>  #endif /* __CL_DEVICE_DATA_H__ */
>  
> --- a/src/cl_gen9_device.h
> +++ b/src/cl_gen9_device.h
> @@ -27,5 +27,7 @@
>  .max_mem_alloc_size = 4 * 1024 * 1024 * 1024ul,  .global_mem_size = 4 * 1024 
> * 1024 * 1024ul,
>  
> +#define GEN9_DEVICE 1
>  #include "cl_gt_device.h"
> +#undef GEN9_DEVICE
>  
> --- a/src/cl_gt_device.h
> +++ b/src/cl_gt_device.h
> @@ -16,7 +16,13 @@
>   *
>   * Author: Benjamin Segovia <benjamin.sego...@intel.com>
>   */
> -
> +#ifdef GEN9_DEVICE
> +#define LIBCL_VERSION_STRING GEN9_LIBCL_VERSION_STRING #define 
> +LIBCL_C_VERSION_STRING GEN9_LIBCL_C_VERSION_STRING #else #define 
> +LIBCL_VERSION_STRING NONGEN9_LIBCL_VERSION_STRING #define 
> +LIBCL_C_VERSION_STRING NONGEN9_LIBCL_C_VERSION_STRING #endif
>  /* Common fields for both all GT devices (IVB / SNB) */  .device_type = 
> CL_DEVICE_TYPE_GPU,
>  .device_id=0,/* == device_id (set when requested) */ @@ -39,7 +45,7 @@  
> .native_vector_width_float = 4,  .native_vector_width_double = 2,  
> .native_vector_width_half = 8, -#ifdef ENABLE_OPENCL_20
> +#if defined(ENABLE_OPENCL_20) && defined (GEN9_DEVICE)
>  .address_bits = 64,
>  #else
>  .address_bits = 32,
> --- a/src/cl_platform_id.c
> +++ b/src/cl_platform_id.c
> @@ -32,7 +32,7 @@
>  
>  static struct _cl_platform_id intel_platform_data = {
>    DECL_INFO_STRING(profile, "FULL_PROFILE")
> -  DECL_INFO_STRING(version, LIBCL_VERSION_STRING)
> +  DECL_INFO_STRING(version, GEN9_LIBCL_VERSION_STRING)
>    DECL_INFO_STRING(name, "Intel Gen OCL Driver")
>    DECL_INFO_STRING(vendor, "Intel")
>    DECL_INFO_STRING(icd_suffix_khr, "Intel")
> --- a/src/cl_platform_id.h
> +++ b/src/cl_platform_id.h
> @@ -72,8 +72,10 @@ extern cl_int cl_get_platform_ids(cl_uin  #else  #define 
> LIBCL_DRIVER_VERSION_STRING _JOINT(LIBCL_DRIVER_VERSION_MAJOR, 
> LIBCL_DRIVER_VERSION_MINOR)  #endif -#define LIBCL_VERSION_STRING "OpenCL " 
> _JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " 
> LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING -#define 
> LIBCL_C_VERSION_STRING "OpenCL C " _JOINT(LIBCL_C_VERSION_MAJOR, 
> LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING 
> BEIGNET_GIT_SHA1_STRING
> +#define GEN9_LIBCL_VERSION_STRING "OpenCL " 
> +_JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " 
> +LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING #define 
> +GEN9_LIBCL_C_VERSION_STRING "OpenCL C " _JOINT(LIBCL_C_VERSION_MAJOR, 
> +LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING 
> +BEIGNET_GIT_SHA1_STRING #define NONGEN9_LIBCL_VERSION_STRING "OpenCL 
> +1.2 beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING 
> +#define NONGEN9_LIBCL_C_VERSION_STRING "OpenCL C 1.2 beignet " 
> +LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING
>  
>  #endif /* __CL_PLATFORM_ID_H__ */
>  
> --- a/utests/CMakeLists.txt
> +++ b/utests/CMakeLists.txt
> @@ -297,7 +297,6 @@ if (LLVM_VERSION_NODOT VERSION_GREATER 34)
>        compiler_overflow.cpp)
>  endif (LLVM_VERSION_NODOT VERSION_GREATER 34)
>  
> -if (ENABLE_OPENCL_20)
>    SET(utests_sources
>        ${utests_sources}
>        compiler_program_global.cpp
> @@ -308,7 +307,6 @@ if (ENABLE_OPENCL_20)
>        runtime_pipe_query.cpp
>        compiler_pipe_builtin.cpp
>        compiler_device_enqueue.cpp)
> -endif (ENABLE_OPENCL_20)
>  
>  if (NOT_BUILD_STAND_ALONE_UTEST)
>    if (X11_FOUND)
> --- a/utests/compiler_atomic_functions_20.cpp
> +++ b/utests/compiler_atomic_functions_20.cpp
> @@ -64,8 +64,9 @@ static void compiler_atomic_functions(co
>    globals[0] = n;
>    locals[0] = LOCAL_SIZE;
>  
> +  if(!cl_check_ocl20(false)){return;}
>    // Setup kernel and buffers
> -  OCL_CREATE_KERNEL_FROM_FILE("compiler_atomic_functions_20", kernel_name);
> +  OCL_CALL(cl_kernel_init, "compiler_atomic_functions_20.cl", 
> + kernel_name, SOURCE, "-cl-std=CL2.0");
>    OCL_CREATE_BUFFER(buf[0], 0, 24 * sizeof(int), NULL);
>    OCL_CREATE_BUFFER(buf[1], 0, locals[0] * sizeof(int), NULL);
>    OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
> --- a/utests/compiler_device_enqueue.cpp
> +++ b/utests/compiler_device_enqueue.cpp
> @@ -6,8 +6,9 @@ void compiler_device_enqueue(void)
>    const uint32_t global_sz = 3;
>    uint32_t result = 0;
>  
> +  if(!cl_check_ocl20(false)){return;}
>    // Setup kernel and buffers
> -  OCL_CREATE_KERNEL("compiler_device_enqueue");
> +  OCL_CALL(cl_kernel_init, "compiler_device_enqueue.cl", 
> + "compiler_device_enqueue", SOURCE, "-cl-std=CL2.0");
>    OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
>    OCL_SET_ARG(0, sizeof(uint32_t), &global_sz);
>    OCL_SET_ARG(1, sizeof(cl_mem), &buf[0]);
> --- a/utests/compiler_generic_atomic.cpp
> +++ b/utests/compiler_generic_atomic.cpp
> @@ -6,6 +6,7 @@ void test_atomic(const char* kernelName)
>    const int n = 16;
>    T cpu_src[16];
>  
> +  if(!cl_check_ocl20(false)){return;}
>    // Setup kernel and buffers
>    OCL_CALL(cl_kernel_init, "compiler_generic_atomic.cl", kernelName, SOURCE, 
> "-cl-std=CL2.0");
>    OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL);
> --- a/utests/compiler_generic_pointer.cpp
> +++ b/utests/compiler_generic_pointer.cpp
> @@ -6,6 +6,7 @@ void test(const char* kernelName)
>    const int n = 16;
>    T cpu_src[16];
>  
> +  if(!cl_check_ocl20(false)){return;}
>    // Setup kernel and buffers
>    OCL_CALL(cl_kernel_init, "compiler_generic_pointer.cl", kernelName, 
> SOURCE, "-cl-std=CL2.0");
>    OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL);
> --- a/utests/compiler_pipe_builtin.cpp
> +++ b/utests/compiler_pipe_builtin.cpp
> @@ -11,8 +11,9 @@ static void compiler_pipe_##GROUP##_##TYPE(void) \
>    const size_t w = 16;  \
>    uint32_t ans_host = 0;  \
>    uint32_t ans_device = 0;  \
> +  if(!cl_check_ocl20(false)){return;}\
>    /* pipe write kernel*/  \
> -  OCL_CREATE_KERNEL_FROM_FILE("compiler_pipe_builtin", 
> "compiler_pipe_"#GROUP"_write_"#TYPE);  \
> +  OCL_CALL(cl_kernel_init, "compiler_pipe_builtin.cl", 
> + "compiler_pipe_"#GROUP"_write_"#TYPE, SOURCE, "-cl-std=CL2.0");\
>    OCL_CALL2(clCreatePipe, buf[0], ctx, 0, sizeof(TYPE), w, NULL);\
>    OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, w * sizeof(TYPE), NULL);\
>    OCL_MAP_BUFFER(1);\
> @@ -26,7 +27,7 @@ static void compiler_pipe_##GROUP##_##TYPE(void) \
>    OCL_NDRANGE(1);\
>    OCL_CALL(clReleaseKernel, kernel);\
>    /* pipe read kernel */\
> -  OCL_CREATE_KERNEL_FROM_FILE("compiler_pipe_builtin", 
> "compiler_pipe_"#GROUP"_read_"#TYPE);\
> +  OCL_CALL(cl_kernel_init, "compiler_pipe_builtin.cl", 
> + "compiler_pipe_"#GROUP"_read_"#TYPE, SOURCE, "-cl-std=CL2.0");\
>    OCL_CREATE_BUFFER(buf[2], CL_MEM_READ_WRITE, w * sizeof(TYPE), NULL);\
>    OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);\
>    OCL_SET_ARG(1, sizeof(cl_mem), &buf[2]);\ @@ -52,8 +53,9 @@ 
> PIPE_BUILTIN(mystruct, workgroup)  static void compiler_pipe_query(void) {
>    const size_t w = 32;
>    const size_t sz = 16;
> +  if(!cl_check_ocl20(false)){return;}
>    /* pipe write kernel */
> -  OCL_CREATE_KERNEL_FROM_FILE("compiler_pipe_builtin", 
> "compiler_pipe_query");
> +  OCL_CALL(cl_kernel_init, "compiler_pipe_builtin.cl", 
> + "compiler_pipe_query", SOURCE, "-cl-std=CL2.0");
>    OCL_CALL2(clCreatePipe, buf[0], ctx, 0, sizeof(uint32_t), w, NULL);
>    OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, sz * sizeof(uint32_t), NULL);
>    OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
> --- a/utests/compiler_program_global.cpp
> +++ b/utests/compiler_program_global.cpp
> @@ -25,6 +25,7 @@ void compiler_program_global()
>    int cpu_src[16];
>    cl_int err;
>  
> +  if(!cl_check_ocl20(false)){return;}
>    // Setup kernel and buffers
>    cl_program program;
>    init_program("compiler_program_global.cl", ctx, &program);
> --- a/utests/runtime_pipe_query.cpp
> +++ b/utests/runtime_pipe_query.cpp
> @@ -4,6 +4,7 @@ static void runtime_pipe_query(void) {
>    const size_t w = 16;
>    const size_t sz = 8;
>    cl_uint retnum, retsz;
> +  if(!cl_check_ocl20(false)){return;}
>    /* pipe write kernel */
>    OCL_CALL2(clCreatePipe, buf[0], ctx, 0, sz, w, NULL);
>    OCL_CALL(clGetPipeInfo, buf[0], CL_PIPE_MAX_PACKETS, sizeof(retnum), 
> &retnum, NULL);
> --- a/utests/utest_helper.hpp
> +++ b/utests/utest_helper.hpp
> @@ -310,8 +310,8 @@ extern clGetKernelSubGroupInfoKHR_cb* ut
>  /* Check if cl_intel_motion_estimation enabled. */  extern int 
> cl_check_motion_estimation(void);
>  
> -/* Check is cl version 2.0. */
> -extern int cl_check_ocl20(void);
> +/* Check is cl version 2.0. (if or_beignet is set, also accept any 
> +beignet device, whatever its CL version)*/ extern int 
> +cl_check_ocl20(bool or_beignet=true);
>  
>  /* Check is FP16 enabled. */
>  extern int cl_check_half(void);
> --- a/utests/utest_helper.cpp
> +++ b/utests/utest_helper.cpp
> @@ -939,13 +939,14 @@ int cl_check_subgroups_short(void)
>    return 1;
>  }
>  
> -int cl_check_ocl20(void)
> +int cl_check_ocl20(bool or_beignet)
>  {
>    size_t param_value_size;
>    size_t ret_sz;
>    OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_OPENCL_C_VERSION, 0, 0, 
> &param_value_size);
>    if(param_value_size == 0) {
>      printf("Not OpenCL 2.0 device, ");
> +    if(or_beignet){
>      if(cl_check_beignet()) {
>        printf("Beignet extension test!");
>        return 1;
> @@ -953,6 +954,10 @@ int cl_check_ocl20(void)
>        printf("Not beignet device , Skip!");
>        return 0;
>      }
> +    }else{
> +      printf("Skip!");
> +      return 0;
> +    }
>    }
>    char* device_version_str = (char* )malloc(param_value_size * sizeof(char) 
> );
>    OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_OPENCL_C_VERSION, 
> param_value_size, (void*)device_version_str, &ret_sz); @@ -961,6 +966,7 @@ 
> int cl_check_ocl20(void)
>    if(!strstr(device_version_str, "2.0")) {
>      free(device_version_str);
>      printf("Not OpenCL 2.0 device, ");
> +    if(or_beignet){
>      if(cl_check_beignet()) {
>        printf("Beignet extension test!");
>        return 1;
> @@ -968,6 +974,10 @@ int cl_check_ocl20(void)
>        printf("Not beignet device , Skip!");
>        return 0;
>      }
> +    }else{
> +      printf("Skip!");
> +      return 0;
> +    }
>    }
>    free(device_version_str);
>    return 1;
> --- a/docs/Beignet.mdwn
> +++ b/docs/Beignet.mdwn
> @@ -154,15 +154,9 @@ Supported Targets
>  
>  OpenCL 2.0
>  ----------
> -From release v1.3.0, beignet support OpenCL 2.0. By default, OpenCL 2.0 
> support is disabled, you can enable it when cmake with option 
> --DENABLE_OPENCL_20=1. Please remember that to enable OpenCL 2.0, there are 
> some dependencies. First, OpenCL 2.0 only support the targets -from Skylake, 
> include Skylake, Kabylake and Apollolake. Then, clang supports all OpenCL 2.0 
> feature from 3.9. So to enable OpenCL 2.0, -you must update LLVM/clang to 3.9 
> or later. And also requeires libdrm at least 2.4.66.
> -After enable OpenCL 2.0, beignet complies with OpenCL 2.0 spec, but some 
> OpenCL 2.0 features are simulated by software, there is no performance -gain, 
> such as pipe and device queues, especially device queues.
> -If you build beignet with OpenCL 2.0 enabled and your kernel don't use the 
> OpenCL 2.0 features, please pass a build option -cl-std=CL1.2 for 
> -performance, the OpenCL 2.0 uses more registers and has lots of int64 
> operations, which may hurt performance, and beignet will continue to improve 
> -OpenCL 2.0 performance.
> +From release v1.3.0, beignet supports OpenCL 2.0 on Skylake and later 
> hardware.
> +This requires LLVM/Clang 3.9 or later and libdrm 2.4.66 or later; if you do 
> not have these or do not want 2.0, you can disable it by passing 
> -DENABLE_OPENCL_20=0 to cmake.
> +As required by the OpenCL specification, kernels are compiled as OpenCL C 
> 1.2 by default; to use 2.0 they must explicitly request it with the 
> -cl-std=CL2.0 build option.  As OpenCL 2.0 is likely to be slower than 1.2, 
> we recommend that this is used only where needed.  (This is because 2.0 uses 
> more registers and has lots of int64 operations, and some of the 2.0 features 
> (pipes and especially device queues) are implemented in software so do not 
> provide any performance gain.)  Beignet will continue to improve OpenCL 2.0 
> performance.
>  
>  Known Issues
>  ------------
>
> _______________________________________________
> Beignet mailing list
> Beignet@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet


Attachment: signature.asc
Description: OpenPGP digital signature

_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to