From: "Yang, Rong R" <[email protected]> Kabylake is almost same as skylake, so use skylake functions directly.
Signed-off-by: Yang Rong <[email protected]> --- src/cl_command_queue.c | 2 +- src/cl_device_id.c | 224 +++++++++++++++++++++++++++++++++++++---------- src/cl_gen10_device.h | 31 +++++++ src/intel/intel_driver.c | 4 +- src/intel/intel_gpgpu.c | 2 +- 5 files changed, 213 insertions(+), 50 deletions(-) create mode 100644 src/cl_gen10_device.h diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index b66928f..07c5d89 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -222,7 +222,7 @@ cl_command_queue_ND_range(cl_command_queue queue, /* Check that the user did not forget any argument */ TRY (cl_kernel_check_args, k); - if (ver == 7 || ver == 75 || ver == 8 || ver == 9) + if (ver == 7 || ver == 75 || ver == 8 || ver == 9 || ver == 10) TRY (cl_command_queue_ND_range_gen7, queue, k, work_dim, global_wk_off, global_wk_sz, local_wk_sz); else FATAL ("Unknown Gen Device"); diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 66666ea..35a19a6 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -210,6 +210,61 @@ static struct _cl_device_id intel_bxt_device = { #include "cl_gen9_device.h" }; +static struct _cl_device_id intel_kbl_gt1_device = { + INIT_ICD(dispatch) + .max_compute_unit = 12, + .max_thread_per_unit = 7, + .sub_slice_count = 2, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, + .max_clock_frequency = 1000, +#include "cl_gen10_device.h" +}; + +static struct _cl_device_id intel_kbl_gt15_device = { + INIT_ICD(dispatch) + .max_compute_unit = 18, + .max_thread_per_unit = 7, + .sub_slice_count = 3, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, + .max_clock_frequency = 1000, +#include "cl_gen10_device.h" +}; + +static struct _cl_device_id intel_kbl_gt2_device = { + INIT_ICD(dispatch) + .max_compute_unit = 24, + .max_thread_per_unit = 7, + .sub_slice_count = 3, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, + .max_clock_frequency = 1000, +#include "cl_gen10_device.h" +}; + +static struct _cl_device_id intel_kbl_gt3_device = { + INIT_ICD(dispatch) + .max_compute_unit = 48, + .max_thread_per_unit = 7, + .sub_slice_count = 6, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, + .max_clock_frequency = 1000, +#include "cl_gen10_device.h" +}; + +static struct _cl_device_id intel_kbl_gt4_device = { + INIT_ICD(dispatch) + .max_compute_unit = 72, + .max_thread_per_unit = 7, + .sub_slice_count = 9, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, + .max_clock_frequency = 1000, +#include "cl_gen10_device.h" +}; + LOCAL cl_device_id cl_get_gt_device(void) { @@ -580,6 +635,98 @@ bxt_break: cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); break; + case PCI_CHIP_KABYLAKE_ULT_GT1: + DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake ULT GT1"); + case PCI_CHIP_KABYLAKE_DT_GT1: + DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake Desktop GT1"); + case PCI_CHIP_KABYLAKE_HALO_GT1: + DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake Halo GT1"); + case PCI_CHIP_KABYLAKE_ULX_GT1: + DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake ULX GT1"); + case PCI_CHIP_KABYLAKE_SRV_GT1: + DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake Server GT1"); +kbl_gt1_break: + intel_kbl_gt1_device.device_id = device_id; + intel_kbl_gt1_device.platform = cl_get_platform_default(); + ret = &intel_kbl_gt1_device; +#ifdef ENABLE_FP64 + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); +#endif + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + + case PCI_CHIP_KABYLAKE_ULT_GT15: + DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) HD Graphics Kabylake ULT GT1.5"); + case PCI_CHIP_KABYLAKE_DT_GT15: + DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) HD Graphics Kabylake Desktop GT1.5"); + case PCI_CHIP_KABYLAKE_HALO_GT15: + DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) HD Graphics Kabylake Halo GT1.5"); + case PCI_CHIP_KABYLAKE_ULX_GT15: + DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) HD Graphics Kabylake ULX GT1.5"); +kbl_gt15_break: + intel_kbl_gt15_device.device_id = device_id; + intel_kbl_gt15_device.platform = cl_get_platform_default(); + ret = &intel_kbl_gt15_device; +#ifdef ENABLE_FP64 + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); +#endif + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + + case PCI_CHIP_KABYLAKE_ULT_GT2: + case PCI_CHIP_KABYLAKE_ULT_GT2_1: + DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake ULT GT2"); + case PCI_CHIP_KABYLAKE_DT_GT2: + DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake Desktop GT2"); + case PCI_CHIP_KABYLAKE_HALO_GT2: + DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake Halo GT2"); + case PCI_CHIP_KABYLAKE_ULX_GT2: + DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake ULX GT2"); + case PCI_CHIP_KABYLAKE_SRV_GT2: + DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake Server GT2"); + case PCI_CHIP_KABYLAKE_WKS_GT2: + DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake Workstation GT2"); +kbl_gt2_break: + intel_kbl_gt2_device.device_id = device_id; + intel_kbl_gt2_device.platform = cl_get_platform_default(); + ret = &intel_kbl_gt2_device; +#ifdef ENABLE_FP64 + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); +#endif + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + + case PCI_CHIP_KABYLAKE_ULT_GT3: + case PCI_CHIP_KABYLAKE_ULT_GT3_1: + case PCI_CHIP_KABYLAKE_ULT_GT3_2: + DECL_INFO_STRING(kbl_gt3_break, intel_kbl_gt3_device, name, "Intel(R) HD Graphics Kabylake ULT GT3"); +kbl_gt3_break: + intel_kbl_gt3_device.device_id = device_id; + intel_kbl_gt3_device.platform = cl_get_platform_default(); + ret = &intel_kbl_gt3_device; +#ifdef ENABLE_FP64 + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); +#endif + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + + case PCI_CHIP_KABYLAKE_HALO_GT4: + DECL_INFO_STRING(kbl_gt4_break, intel_kbl_gt4_device, name, "Intel(R) HD Graphics Kabylake ULT GT4"); +kbl_gt4_break: + intel_kbl_gt4_device.device_id = device_id; + intel_kbl_gt4_device.platform = cl_get_platform_default(); + ret = &intel_kbl_gt4_device; +#ifdef ENABLE_FP64 + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); +#endif + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + case PCI_CHIP_SANDYBRIDGE_BRIDGE: case PCI_CHIP_SANDYBRIDGE_GT1: case PCI_CHIP_SANDYBRIDGE_GT2: @@ -787,6 +934,29 @@ cl_get_device_ids(cl_platform_id platform, memcpy(param_value, device->FIELD, device->JOIN(FIELD,_sz)); \ return CL_SUCCESS; +LOCAL cl_bool is_gen_device(cl_device_id device) { + return device == &intel_ivb_gt1_device || + device == &intel_ivb_gt2_device || + device == &intel_baytrail_t_device || + device == &intel_hsw_gt1_device || + device == &intel_hsw_gt2_device || + device == &intel_hsw_gt3_device || + device == &intel_brw_gt1_device || + device == &intel_brw_gt2_device || + device == &intel_brw_gt3_device || + device == &intel_chv_device || + device == &intel_skl_gt1_device || + device == &intel_skl_gt2_device || + device == &intel_skl_gt3_device || + device == &intel_skl_gt4_device || + device == &intel_bxt_device || + device == &intel_kbl_gt1_device || + device == &intel_kbl_gt15_device || + device == &intel_kbl_gt2_device || + device == &intel_kbl_gt3_device || + device == &intel_kbl_gt4_device; +} + LOCAL cl_int cl_get_device_info(cl_device_id device, cl_device_info param_name, @@ -794,22 +964,7 @@ cl_get_device_info(cl_device_id device, void * param_value, size_t * param_value_size_ret) { - if (UNLIKELY(device != &intel_ivb_gt1_device && - device != &intel_ivb_gt2_device && - device != &intel_baytrail_t_device && - device != &intel_hsw_gt1_device && - device != &intel_hsw_gt2_device && - device != &intel_hsw_gt3_device && - device != &intel_brw_gt1_device && - device != &intel_brw_gt2_device && - device != &intel_brw_gt3_device && - device != &intel_chv_device && - device != &intel_skl_gt1_device && - device != &intel_skl_gt2_device && - device != &intel_skl_gt3_device && - device != &intel_skl_gt4_device && - device != &intel_bxt_device - )) + if (UNLIKELY(is_gen_device(device) == CL_FALSE)) return CL_INVALID_DEVICE; /* Find the correct parameter */ @@ -909,22 +1064,7 @@ cl_get_device_info(cl_device_id device, LOCAL cl_int cl_device_get_version(cl_device_id device, cl_int *ver) { - if (UNLIKELY(device != &intel_ivb_gt1_device && - device != &intel_ivb_gt2_device && - device != &intel_baytrail_t_device && - device != &intel_hsw_gt1_device && - device != &intel_hsw_gt2_device && - device != &intel_hsw_gt3_device && - device != &intel_brw_gt1_device && - device != &intel_brw_gt2_device && - device != &intel_brw_gt3_device && - device != &intel_chv_device && - device != &intel_skl_gt1_device && - device != &intel_skl_gt2_device && - device != &intel_skl_gt3_device && - device != &intel_skl_gt4_device && - device != &intel_bxt_device - )) + if (UNLIKELY(is_gen_device(device) == CL_FALSE)) return CL_INVALID_DEVICE; if (ver == NULL) return CL_SUCCESS; @@ -942,6 +1082,10 @@ cl_device_get_version(cl_device_id device, cl_int *ver) || device == &intel_skl_gt3_device || device == &intel_skl_gt4_device || device == &intel_bxt_device) { *ver = 9; + } else if (device == &intel_kbl_gt1_device || device == &intel_kbl_gt2_device + || device == &intel_kbl_gt3_device || device == &intel_kbl_gt4_device + || device == &intel_kbl_gt15_device) { + *ver = 10; } else return CL_INVALID_VALUE; @@ -1015,21 +1159,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel, { int err = CL_SUCCESS; int dimension = 0; - if (UNLIKELY(device != &intel_ivb_gt1_device && - device != &intel_ivb_gt2_device && - device != &intel_baytrail_t_device && - device != &intel_hsw_gt1_device && - device != &intel_hsw_gt2_device && - device != &intel_hsw_gt3_device && - device != &intel_brw_gt1_device && - device != &intel_brw_gt2_device && - device != &intel_brw_gt3_device && - device != &intel_chv_device && - device != &intel_skl_gt1_device && - device != &intel_skl_gt2_device && - device != &intel_skl_gt3_device && - device != &intel_skl_gt4_device && - device != &intel_bxt_device)) + if (UNLIKELY(is_gen_device(device) == CL_FALSE)) return CL_INVALID_DEVICE; CHECK_KERNEL(kernel); diff --git a/src/cl_gen10_device.h b/src/cl_gen10_device.h new file mode 100644 index 0000000..4d961eb --- /dev/null +++ b/src/cl_gen10_device.h @@ -0,0 +1,31 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Benjamin Segovia <[email protected]> + */ + +/* Common fields for both KBL devices */ +.max_parameter_size = 1024, +.global_mem_cache_line_size = 64, /* XXX */ +.global_mem_cache_size = 8 << 10, /* XXX */ +.local_mem_type = CL_GLOBAL, +.local_mem_size = 64 << 10, +.scratch_mem_size = 2 << 20, +.max_mem_alloc_size = 4 * 1024 * 1024 * 1024ul, +.global_mem_size = 4 * 1024 * 1024 * 1024ul, + +#include "cl_gt_device.h" + diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index e561725..a74d936 100644 --- a/src/intel/intel_driver.c +++ b/src/intel/intel_driver.c @@ -169,7 +169,9 @@ intel_driver_init(intel_driver_t *driver, int dev_fd) else FATAL ("Unsupported Gen for emulation"); #else - if (IS_GEN9(driver->device_id)) + if (IS_GEN10(driver->device_id)) + driver->gen_ver = 10; + else if (IS_GEN9(driver->device_id)) driver->gen_ver = 9; else if (IS_GEN8(driver->device_id)) driver->gen_ver = 8; diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index db967e8..b80feda 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -2467,7 +2467,7 @@ intel_set_gpgpu_callbacks(int device_id) intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen7; return; } - if (IS_SKYLAKE(device_id) || IS_BROXTON(device_id)) { + if (IS_SKYLAKE(device_id) || IS_BROXTON(device_id) || IS_KABYLAKE(device_id)) { cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen9; intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8; cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen9; -- 2.1.4 _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
