Correct stuct gen8_interface_descriptor. Add function intel_gpgpu_build_idrt_gen9 for difference slm size setting. Disable skl's global barrier now.
Signed-off-by: Yang Rong <rong.r.y...@intel.com> --- src/intel/intel_gpgpu.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- src/intel/intel_structs.h | 5 +++-- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index cd45ff9..c02a95c 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -1433,6 +1433,50 @@ intel_gpgpu_build_idrt_gen8(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) desc->desc6.slm_sz = slm_sz; } +static void +intel_gpgpu_build_idrt_gen9(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) +{ + gen8_interface_descriptor_t *desc; + + desc = (gen8_interface_descriptor_t*) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.idrt_offset); + + memset(desc, 0, sizeof(*desc)); + desc->desc0.kernel_start_pointer = 0; /* reloc */ + desc->desc2.single_program_flow = 0; + desc->desc2.floating_point_mode = 0; /* use IEEE-754 rule */ + desc->desc6.rounding_mode = 0; /* round to nearest even */ + + assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) % 32 == 0); + desc->desc3.sampler_state_pointer = gpgpu->aux_offset.sampler_state_offset >> 5; + desc->desc4.binding_table_entry_count = 0; /* no prefetch */ + desc->desc4.binding_table_pointer = 0; + desc->desc5.curbe_read_len = kernel->curbe_sz / 32; + desc->desc5.curbe_read_offset = 0; + + /* Barriers / SLM are automatically handled on Gen7+ */ + size_t slm_sz = kernel->slm_sz; + /* group_threads_num should not be set to 0 even if the barrier is disabled per bspec */ + desc->desc6.group_threads_num = kernel->thread_n; + desc->desc6.barrier_enable = kernel->use_slm; + if (slm_sz == 0) + slm_sz = 0; + else if (slm_sz <= 1*KB) + slm_sz = 1; + else if (slm_sz <= 2*KB) + slm_sz = 2; + else if (slm_sz <= 4*KB) + slm_sz = 3; + else if (slm_sz <= 8*KB) + slm_sz = 4; + else if (slm_sz <= 16*KB) + slm_sz = 5; + else if (slm_sz <= 32*KB) + slm_sz = 6; + else + slm_sz = 7; + desc->desc6.slm_sz = slm_sz; +} + static int intel_gpgpu_upload_curbes(intel_gpgpu_t *gpgpu, const void* data, uint32_t size) { @@ -2040,7 +2084,7 @@ intel_set_gpgpu_callbacks(int device_id) intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen8; intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen8; cl_gpgpu_walker = (cl_gpgpu_walker_cb *)intel_gpgpu_walker_gen8; - intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen8; + intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen9; intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8; intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8; cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8; diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h index b4635f4..fd6a82b 100644 --- a/src/intel/intel_structs.h +++ b/src/intel/intel_structs.h @@ -142,8 +142,9 @@ typedef struct gen8_interface_descriptor } desc5; struct { - uint32_t group_threads_num:8; /* 0..64, 0 - no barrier use */ - uint32_t barrier_return_byte:8; + uint32_t group_threads_num:10; /* 0..64, 0 - no barrier use */ + uint32_t pad:5; + uint32_t global_barrier_enable:1; uint32_t slm_sz:5; /* 0..16 - 0K..64K */ uint32_t barrier_enable:1; uint32_t rounding_mode:2; -- 2.1.0 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet