Please ignore this version. Forgot to commit the new code. A new version has been sent minutes ago.
On Tue, Jul 01, 2014 at 02:54:07PM +0800, Zhigang Gong wrote: > According to spec, different platforms have different curbe > allocation restrication. The previous code set the curbe > allocated size to 480 statically which is not correct. > > This patch change to always set the curbe entry num to 64 > which is the maximum work group size. And set proper curbe > allocation size according to the platform's hard limitation > and a relatively reasonable kernel argument usage limitation. > > v2: > when we call load_vte_state, we already know the eaxctly constant urb > size used in the current kernel. We could choose a smallest valid curbe > size for this kernel. And if the size exceed the hardware limitation, > we report it as a warning here. > > Signed-off-by: Zhigang Gong <[email protected]> > --- > src/cl_gt_device.h | 2 +- > src/intel/intel_gpgpu.c | 41 ++++++++++++++++++++++++++++------------- > 2 files changed, 29 insertions(+), 14 deletions(-) > > diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h > index 63c9047..97ba7e2 100644 > --- a/src/cl_gt_device.h > +++ b/src/cl_gt_device.h > @@ -39,7 +39,7 @@ > .address_bits = 32, > .max_mem_alloc_size = 256 * 1024 * 1024, > .image_support = CL_TRUE, > -.max_read_image_args = 128, > +.max_read_image_args = 16, > .max_write_image_args = 8, > .image_max_array_size = 2048, > .image2d_max_width = 8192, > diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c > index d403aa0..48e2769 100644 > --- a/src/intel/intel_gpgpu.c > +++ b/src/intel/intel_gpgpu.c > @@ -116,7 +116,7 @@ struct intel_gpgpu > struct { > uint32_t num_cs_entries; > uint32_t size_cs_entry; /* size of one entry in 512bit elements */ > - } urb; > + } curb; > > uint32_t max_threads; /* max threads requested by the user */ > }; > @@ -275,6 +275,22 @@ uint32_t intel_gpgpu_get_scratch_index_gen75(uint32_t > size) { > return index; > } > > +#define MAX_KERNEL_ARG_SIZE (32 * 4 + 24 * 4 + 5 * 64) * 64 // 32 integer > arguments, 24 uniform special register and 5 vector special register. > + > +LOCAL cl_int > +cl_get_max_curbe_size(uint32_t device_id) > +{ > + int max_curbe_size; > + if (IS_BAYTRAIL_T(device_id) || > + IS_IVB_GT1(device_id)) > + max_curbe_size = 992; > + else > + max_curbe_size = 2016; > + > + return (max_curbe_size*32) > MAX_KERNEL_ARG_SIZE ? > + (MAX_KERNEL_ARG_SIZE / 32) : max_curbe_size; > +} > + > static void > intel_gpgpu_load_vfe_state(intel_gpgpu_t *gpgpu) > { > @@ -293,10 +309,10 @@ intel_gpgpu_load_vfe_state(intel_gpgpu_t *gpgpu) > OUT_BATCH(gpgpu->batch, 0); > } > /* max_thread | urb entries | (reset_gateway|bypass_gate_way | gpgpu_mode) > */ > - OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (64 << 8) | > 0xc4); > + OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (0 << 8) | > 0xc4); > OUT_BATCH(gpgpu->batch, 0); > /* curbe_size */ > - OUT_BATCH(gpgpu->batch, 480); > + OUT_BATCH(gpgpu->batch, cl_get_max_curbe_size(gpgpu->drv->device_id)); > OUT_BATCH(gpgpu->batch, 0); > OUT_BATCH(gpgpu->batch, 0); > OUT_BATCH(gpgpu->batch, 0); > @@ -306,17 +322,16 @@ intel_gpgpu_load_vfe_state(intel_gpgpu_t *gpgpu) > static void > intel_gpgpu_load_curbe_buffer(intel_gpgpu_t *gpgpu) > { > + int curbe_size = gpgpu->curb.size_cs_entry * gpgpu->curb.num_cs_entries; > + if (curbe_size > cl_get_max_curbe_size(gpgpu->drv->device_id)) { > + curbe_size = cl_get_max_curbe_size(gpgpu->drv->device_id); > + fprintf(stderr, "warning, curbe size exceed limitation.\n"); > + } > BEGIN_BATCH(gpgpu->batch, 4); > OUT_BATCH(gpgpu->batch, CMD(2,0,1) | (4 - 2)); /* length-2 */ > OUT_BATCH(gpgpu->batch, 0); /* mbz */ > -// XXX > -#if 1 > OUT_BATCH(gpgpu->batch, > - gpgpu->urb.size_cs_entry* > - gpgpu->urb.num_cs_entries*32); > -#else > - OUT_BATCH(gpgpu->batch, 5120); > -#endif > + curbe_size * 32); > OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, > gpgpu->aux_offset.curbe_offset); > ADVANCE_BATCH(gpgpu->batch); > } > @@ -577,8 +592,8 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu, > gpgpu->sampler_bitmap = ~((1 << max_sampler_n) - 1); > > /* URB */ > - gpgpu->urb.num_cs_entries = max_threads; > - gpgpu->urb.size_cs_entry = size_cs_entry; > + gpgpu->curb.num_cs_entries = 64; > + gpgpu->curb.size_cs_entry = size_cs_entry; > gpgpu->max_threads = max_threads; > > if (gpgpu->printf_b.ibo) > @@ -616,7 +631,7 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu, > //curbe must be 32 bytes aligned > size_aux = ALIGN(size_aux, 32); > gpgpu->aux_offset.curbe_offset = size_aux; > - size_aux += gpgpu->urb.num_cs_entries * gpgpu->urb.size_cs_entry * 64; > + size_aux += gpgpu->curb.num_cs_entries * gpgpu->curb.size_cs_entry * 32; > > //idrt must be 32 bytes aligned > size_aux = ALIGN(size_aux, 32); > -- > 1.8.3.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
