LGTM. Because 0x5A85 is fuse down, after enable pooled EU, need to take care of the fuse down sub slice.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Guo Yejun > Sent: Monday, September 12, 2016 15:02 > To: [email protected] > Cc: Guo, Yejun <[email protected]> > Subject: [Beignet] [PATCH V2] add another broxton pciid 0x5A85 > > v2: split the code relative to chv to another patch > Signed-off-by: Guo Yejun <[email protected]> > --- > src/cl_command_queue_gen7.c | 11 ++++++----- > src/cl_device_data.h | 4 +++- > src/cl_device_id.c | 23 ++++++++++++++++++++++- > src/cl_driver.h | 4 ++++ > src/cl_driver_defs.c | 1 + > src/intel/intel_driver.c | 10 ++++++++++ > 6 files changed, 46 insertions(+), 7 deletions(-) > > diff --git a/src/cl_command_queue_gen7.c > b/src/cl_command_queue_gen7.c index 6a9cf1f..b6a5920 100644 > --- a/src/cl_command_queue_gen7.c > +++ b/src/cl_command_queue_gen7.c > @@ -272,12 +272,13 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker) > assert(offset >= 0); > stack_sz *= interp_kernel_get_simd_width(ker->opaque); > stack_sz *= device->max_compute_unit * ctx->device- > >max_thread_per_unit; > - /* Because HSW calc stack offset per thread is relative with half slice, > when > - thread schedule in half slice is not balance, would out of bound. > Because > - the max half slice is 4 in GT4, multiply stack size with 4 for safe. > + > + /* for some hardware, part of EUs are disabled with EU id reserved, > + * it makes the active EU id larger than count of EUs within a subslice, > + * need to enlarge stack size for such case to avoid out of range. > */ > - if(cl_driver_get_ver(ctx->drv) == 75) > - stack_sz *= 4; > + cl_driver_enlarge_stack_size(ctx->drv, &stack_sz); > + > cl_gpgpu_set_stack(gpgpu, offset, stack_sz, BTI_PRIVATE); } > > diff --git a/src/cl_device_data.h b/src/cl_device_data.h index > f680219..30366ea 100644 > --- a/src/cl_device_data.h > +++ b/src/cl_device_data.h > @@ -298,9 +298,11 @@ > > /* BXT */ > #define PCI_CHIP_BROXTON_P 0x5A84 /* Intel(R) BXT-P for mobile > desktop */ > +#define PCI_CHIP_BROXTON_1 0x5A85 > > #define IS_BROXTON(devid) \ > - (devid == PCI_CHIP_BROXTON_P) > + (devid == PCI_CHIP_BROXTON_P || \ > + devid == PCI_CHIP_BROXTON_1) > > #define PCI_CHIP_KABYLAKE_ULT_GT1 0x5906 > #define PCI_CHIP_KABYLAKE_ULT_GT2 0x5916 > diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 34c182c..ce340c1 > 100644 > --- a/src/cl_device_id.c > +++ b/src/cl_device_id.c > @@ -195,6 +195,16 @@ static struct _cl_device_id intel_bxt_device = > { #include "cl_gen9_device.h" > }; > > +static struct _cl_device_id intel_bxt1_device = { > + .max_compute_unit = 12, > + .max_thread_per_unit = 6, > + .sub_slice_count = 2, > + .max_work_item_sizes = {512, 512, 512}, > + .max_work_group_size = 512, > + .max_clock_frequency = 1000, > +#include "cl_gen9_device.h" > +}; > + > static struct _cl_device_id intel_kbl_gt1_device = { > .max_compute_unit = 12, > .max_thread_per_unit = 7, > @@ -615,6 +625,16 @@ bxt_break: > cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); > break; > > + case PCI_CHIP_BROXTON_1: > + DECL_INFO_STRING(bxt1_break, intel_bxt1_device, name, "Intel(R) > +HD Graphics Broxton 1"); > +bxt1_break: > + intel_bxt1_device.device_id = device_id; > + intel_bxt1_device.platform = cl_get_platform_default(); > + ret = &intel_bxt1_device; > + cl_intel_platform_get_default_extension(ret); > + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); > + break; > + > case PCI_CHIP_KABYLAKE_ULT_GT1: > DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) > HD Graphics Kabylake ULT GT1"); > case PCI_CHIP_KABYLAKE_DT_GT1: > @@ -931,6 +951,7 @@ LOCAL cl_bool is_gen_device(cl_device_id device) { > device == &intel_skl_gt3_device || > device == &intel_skl_gt4_device || > device == &intel_bxt_device || > + device == &intel_bxt1_device || > device == &intel_kbl_gt1_device || > device == &intel_kbl_gt15_device || > device == &intel_kbl_gt2_device || @@ -1074,7 +1095,7 @@ > cl_device_get_version(cl_device_id device, cl_int *ver) > *ver = 8; > } else if (device == &intel_skl_gt1_device || device == > &intel_skl_gt2_device > || device == &intel_skl_gt3_device || device == &intel_skl_gt4_device > - || device == &intel_bxt_device || device == &intel_kbl_gt1_device > + || device == &intel_bxt_device || device == &intel_bxt1_device > + || device == &intel_kbl_gt1_device > || device == &intel_kbl_gt2_device || device == &intel_kbl_gt3_device > || device == &intel_kbl_gt4_device || device == > &intel_kbl_gt15_device) { > *ver = 9; > diff --git a/src/cl_driver.h b/src/cl_driver.h index 16730db..584be9d 100644 > --- a/src/cl_driver.h > +++ b/src/cl_driver.h > @@ -51,6 +51,10 @@ extern cl_driver_get_bufmgr_cb > *cl_driver_get_bufmgr; typedef uint32_t (cl_driver_get_ver_cb)(cl_driver); > extern cl_driver_get_ver_cb *cl_driver_get_ver; > > +/* enlarge stack size from the driver */ typedef void > +(cl_driver_enlarge_stack_size_cb)(cl_driver, int32_t*); extern > +cl_driver_enlarge_stack_size_cb *cl_driver_enlarge_stack_size; > + > typedef enum cl_self_test_res{ > SELF_TEST_PASS = 0, > SELF_TEST_SLM_FAIL = 1, > diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index > 31176a4..ea4e90a 100644 > --- a/src/cl_driver_defs.c > +++ b/src/cl_driver_defs.c > @@ -25,6 +25,7 @@ LOCAL cl_driver_new_cb *cl_driver_new = NULL; > LOCAL cl_driver_delete_cb *cl_driver_delete = NULL; LOCAL > cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL; LOCAL > cl_driver_get_ver_cb *cl_driver_get_ver = NULL; > +LOCAL cl_driver_enlarge_stack_size_cb *cl_driver_enlarge_stack_size = > +NULL; > LOCAL cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag = NULL; > LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL; LOCAL > cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL; > diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index > e561725..ec2fb31 100644 > --- a/src/intel/intel_driver.c > +++ b/src/intel/intel_driver.c > @@ -464,6 +464,15 @@ intel_driver_get_ver(struct intel_driver *drv) } > > static void > +intel_driver_enlarge_stack_size(struct intel_driver *drv, int32_t > +*stack_size) { > + if (drv->gen_ver == 75) > + *stack_size = *stack_size * 4; > + else if (drv->device_id == PCI_CHIP_BROXTON_1) > + *stack_size = *stack_size * 2; > +} > + > +static void > intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag) { > drv->atomic_test_result = atomic_flag; @@ -921,6 +930,7 @@ > intel_setup_callbacks(void) > cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new; > cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete; > cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver; > + cl_driver_enlarge_stack_size = (cl_driver_enlarge_stack_size_cb *) > + intel_driver_enlarge_stack_size; > cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *) > intel_driver_set_atomic_flag; > cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) > intel_driver_get_bufmgr; > cl_driver_get_device_id = (cl_driver_get_device_id_cb *) > intel_get_device_id; > -- > 2.7.4 > > _______________________________________________ > Beignet mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
