From: Junyan He <junyan...@linux.intel.com> Signed-off-by: Junyan He <junyan...@linux.intel.com> --- src/cl_device_id.c | 123 ++++++++++++++++++++++++++++++--------------------- src/cl_device_id.h | 1 + src/cl_extensions.c | 29 ++++++++++-- src/cl_extensions.h | 2 + src/cl_gt_device.h | 1 + src/cl_platform_id.c | 2 +- 6 files changed, 102 insertions(+), 56 deletions(-)
diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 215f7f2..09171f8 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -26,6 +26,7 @@ #include "cl_khr_icd.h" #include "cl_thread.h" #include "CL/cl.h" +#include "CL/cl_ext.h" #include "cl_gbe_loader.h" #include "cl_alloc.h" @@ -398,6 +399,8 @@ baytrail_t_device_break: case PCI_CHIP_BROADWLL_U_GT1: DECL_INFO_STRING(brw_gt1_break, intel_brw_gt1_device, name, "Intel(R) HD Graphics BroadWell ULX GT1"); brw_gt1_break: + /* For Gen8 and later, half float is suppported and we will enable cl_khr_fp16. */ + cl_intel_platform_enable_fp16_extension(intel_platform); intel_brw_gt1_device.vendor_id = device_id; intel_brw_gt1_device.platform = intel_platform; ret = &intel_brw_gt1_device; @@ -414,6 +417,7 @@ brw_gt1_break: case PCI_CHIP_BROADWLL_U_GT2: DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell ULX GT2"); brw_gt2_break: + cl_intel_platform_enable_fp16_extension(intel_platform); intel_brw_gt2_device.vendor_id = device_id; intel_brw_gt2_device.platform = intel_platform; ret = &intel_brw_gt2_device; @@ -430,6 +434,7 @@ brw_gt2_break: case PCI_CHIP_BROADWLL_U_GT3: DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell ULX GT2"); brw_gt3_break: + cl_intel_platform_enable_fp16_extension(intel_platform); intel_brw_gt3_device.vendor_id = device_id; intel_brw_gt3_device.platform = intel_platform; ret = &intel_brw_gt3_device; @@ -447,61 +452,65 @@ chv_break: break; - case PCI_CHIP_SKYLAKE_ULT_GT1: - DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake ULT GT1"); - case PCI_CHIP_SKYLAKE_ULX_GT1: - DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake ULX GT1"); - case PCI_CHIP_SKYLAKE_DT_GT1: - DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Desktop GT1"); - case PCI_CHIP_SKYLAKE_HALO_GT1: - DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Halo GT1"); - case PCI_CHIP_SKYLAKE_SRV_GT1: - DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Server GT1"); + case PCI_CHIP_SKYLAKE_ULT_GT1: + DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake ULT GT1"); + case PCI_CHIP_SKYLAKE_ULX_GT1: + DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake ULX GT1"); + case PCI_CHIP_SKYLAKE_DT_GT1: + DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Desktop GT1"); + case PCI_CHIP_SKYLAKE_HALO_GT1: + DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Halo GT1"); + case PCI_CHIP_SKYLAKE_SRV_GT1: + DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Server GT1"); skl_gt1_break: - intel_skl_gt1_device.vendor_id = device_id; - intel_skl_gt1_device.platform = intel_platform; - ret = &intel_skl_gt1_device; - break; - - case PCI_CHIP_SKYLAKE_ULT_GT2: - DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULT GT2"); - case PCI_CHIP_SKYLAKE_ULT_GT2F: - DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULT GT2F"); - case PCI_CHIP_SKYLAKE_ULX_GT2: - DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULX GT2"); - case PCI_CHIP_SKYLAKE_DT_GT2: - DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Desktop GT2"); - case PCI_CHIP_SKYLAKE_HALO_GT2: - DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Halo GT2"); - case PCI_CHIP_SKYLAKE_SRV_GT2: - DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Server GT2"); + cl_intel_platform_enable_fp16_extension(intel_platform); + intel_skl_gt1_device.vendor_id = device_id; + intel_skl_gt1_device.platform = intel_platform; + ret = &intel_skl_gt1_device; + break; + + case PCI_CHIP_SKYLAKE_ULT_GT2: + DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULT GT2"); + case PCI_CHIP_SKYLAKE_ULT_GT2F: + DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULT GT2F"); + case PCI_CHIP_SKYLAKE_ULX_GT2: + DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULX GT2"); + case PCI_CHIP_SKYLAKE_DT_GT2: + DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Desktop GT2"); + case PCI_CHIP_SKYLAKE_HALO_GT2: + DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Halo GT2"); + case PCI_CHIP_SKYLAKE_SRV_GT2: + DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Server GT2"); skl_gt2_break: - intel_skl_gt2_device.vendor_id = device_id; - intel_skl_gt2_device.platform = intel_platform; - ret = &intel_skl_gt2_device; - break; - - case PCI_CHIP_SKYLAKE_ULT_GT3: - DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake ULT GT3"); - case PCI_CHIP_SKYLAKE_HALO_GT3: - DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Halo GT3"); - case PCI_CHIP_SKYLAKE_SRV_GT3: - DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Server GT3"); + cl_intel_platform_enable_fp16_extension(intel_platform); + intel_skl_gt2_device.vendor_id = device_id; + intel_skl_gt2_device.platform = intel_platform; + ret = &intel_skl_gt2_device; + break; + + case PCI_CHIP_SKYLAKE_ULT_GT3: + DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake ULT GT3"); + case PCI_CHIP_SKYLAKE_HALO_GT3: + DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Halo GT3"); + case PCI_CHIP_SKYLAKE_SRV_GT3: + DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Server GT3"); skl_gt3_break: - intel_skl_gt3_device.vendor_id = device_id; - intel_skl_gt3_device.platform = intel_platform; - ret = &intel_skl_gt3_device; - break; - - case PCI_CHIP_SKYLAKE_HALO_GT4: - DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Halo GT4"); - case PCI_CHIP_SKYLAKE_SRV_GT4: - DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Server GT4"); + cl_intel_platform_enable_fp16_extension(intel_platform); + intel_skl_gt3_device.vendor_id = device_id; + intel_skl_gt3_device.platform = intel_platform; + ret = &intel_skl_gt3_device; + break; + + case PCI_CHIP_SKYLAKE_HALO_GT4: + DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Halo GT4"); + case PCI_CHIP_SKYLAKE_SRV_GT4: + DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Server GT4"); skl_gt4_break: - intel_skl_gt4_device.vendor_id = device_id; - intel_skl_gt4_device.platform = intel_platform; - ret = &intel_skl_gt4_device; - break; + cl_intel_platform_enable_fp16_extension(intel_platform); + intel_skl_gt4_device.vendor_id = device_id; + intel_skl_gt4_device.platform = intel_platform; + ret = &intel_skl_gt4_device; + break; case PCI_CHIP_SANDYBRIDGE_BRIDGE: case PCI_CHIP_SANDYBRIDGE_GT1: @@ -626,6 +635,17 @@ cl_get_device_ids(cl_platform_id platform, { cl_device_id device; + /* Spec allow platform to be NULL, and If platform + is NULL, the behavior is implementation-defined. + We can not init the device before platform init. */ + if (!platform) { + if (num_devices) + *num_devices = 0; + if (devices) + *devices = 0; + return CL_DEVICE_NOT_FOUND; + } + /* Do we have a usable device? */ device = cl_get_gt_device(); if (device && cl_self_test(device)) { @@ -747,6 +767,7 @@ cl_get_device_info(cl_device_id device, DECL_FIELD(MEM_BASE_ADDR_ALIGN, mem_base_addr_align) DECL_FIELD(MIN_DATA_TYPE_ALIGN_SIZE, min_data_type_align_size) DECL_FIELD(SINGLE_FP_CONFIG, single_fp_config) + DECL_FIELD(HALF_FP_CONFIG, half_fp_config) DECL_FIELD(DOUBLE_FP_CONFIG, double_fp_config) DECL_FIELD(GLOBAL_MEM_CACHE_TYPE, global_mem_cache_type) DECL_FIELD(GLOBAL_MEM_CACHELINE_SIZE, global_mem_cache_line_size) diff --git a/src/cl_device_id.h b/src/cl_device_id.h index ee6a8e6..cde0160 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -66,6 +66,7 @@ struct _cl_device_id { cl_uint mem_base_addr_align; cl_uint min_data_type_align_size; cl_device_fp_config single_fp_config; + cl_device_fp_config half_fp_config; cl_device_fp_config double_fp_config; cl_device_mem_cache_type global_mem_cache_type; cl_uint global_mem_cache_line_size; diff --git a/src/cl_extensions.c b/src/cl_extensions.c index adcf82e..14ac726 100644 --- a/src/cl_extensions.c +++ b/src/cl_extensions.c @@ -11,6 +11,7 @@ #include <stdlib.h> #include <string.h> +#include <assert.h> static struct cl_extensions intel_extensions = { @@ -90,12 +91,31 @@ process_extension_str(cl_extensions_t *extensions) } } +static int ext_initialized = 0; + LOCAL void -cl_intel_platform_extension_init(cl_platform_id intel_platform) +cl_intel_platform_enable_fp16_extension(cl_platform_id intel_platform) { - static int initialized = 0; + cl_extensions_t *extensions = &intel_extensions; + int id; + assert(ext_initialized); + + for(id = OPT1_EXT_START_ID; id <= OPT1_EXT_END_ID; id++) + { + if (id == EXT_ID(khr_fp16)) + extensions->extensions[id].base.ext_enabled = 1; + } - if (initialized) { + process_extension_str(extensions); + intel_platform->internal_extensions = &intel_extensions; + intel_platform->extensions = intel_extensions.ext_str; + intel_platform->extensions_sz = strlen(intel_platform->extensions) + 1; +} + +LOCAL void +cl_intel_platform_extension_init(cl_platform_id intel_platform) +{ + if (ext_initialized) { intel_platform->internal_extensions = &intel_extensions; intel_platform->extensions = intel_extensions.ext_str; return; @@ -108,7 +128,8 @@ cl_intel_platform_extension_init(cl_platform_id intel_platform) intel_platform->internal_extensions = &intel_extensions; intel_platform->extensions = intel_extensions.ext_str; + intel_platform->extensions_sz = strlen(intel_platform->extensions) + 1; - initialized = 1; + ext_initialized = 1; return; } diff --git a/src/cl_extensions.h b/src/cl_extensions.h index e6cdce8..b1154a2 100644 --- a/src/cl_extensions.h +++ b/src/cl_extensions.h @@ -94,3 +94,5 @@ typedef struct cl_extensions { extern void cl_intel_platform_extension_init(cl_platform_id intel_platform); +extern void +cl_intel_platform_enable_fp16_extension(cl_platform_id intel_platform); diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index 0950327..4b43c20 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -75,6 +75,7 @@ .platform = NULL, /* == intel_platform (set when requested) */ /* IEEE 754, XXX does IVB support CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT? */ .single_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST , /* IEEE 754. */ +.half_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST , .printf_buffer_size = 1 * 1024 * 1024, .interop_user_sync = CL_TRUE, diff --git a/src/cl_platform_id.c b/src/cl_platform_id.c index a97c00f..bc2d799 100644 --- a/src/cl_platform_id.c +++ b/src/cl_platform_id.c @@ -56,7 +56,7 @@ cl_get_platform_ids(cl_uint num_entries, /* Easy right now, only one platform is supported */ if(platforms) *platforms = intel_platform; - intel_platform->extensions_sz = strlen(intel_platform->extensions) + 1; + return CL_SUCCESS; } -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet