LGTM, just pushed, thanks.
On Fri, Dec 26, 2014 at 02:57:58PM +0800, Yang Rong wrote: > From: Luo Xionghu <[email protected]> > > HSW has a limitation when PIPECONTROL with RO Cache Invalidation: > Prior to programming a PIPECONTROL command with any of the RO cache > invalidation bit set, > program a PIPECONTROL flush command with CS stall bit and HDC Flush bit set. > > So must use two PIPECONTROL commands to flush and invalidate L3 cache in HSW. > This patch fix some random fails which has very heavy DC read/write in HSW. > --- > src/cl_command_queue_gen7.c | 2 +- > src/intel/intel_gpgpu.c | 36 +++++++++++++++++++++++++++++++++++- > 2 files changed, 36 insertions(+), 2 deletions(-) > > diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c > index ba015ca..734267a 100644 > --- a/src/cl_command_queue_gen7.c > +++ b/src/cl_command_queue_gen7.c > @@ -31,7 +31,7 @@ > #include <string.h> > > #define MAX_GROUP_SIZE_IN_HALFSLICE 512 > -static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return > 256+128; } > +static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return > 256+256; } > > /* "Varing" payload is the part of the curbe that changes accross threads in > the > * same work group. Right now, it consists in local IDs and block IPs > diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c > index c80a11b..3471be0 100644 > --- a/src/intel/intel_gpgpu.c > +++ b/src/intel/intel_gpgpu.c > @@ -104,6 +104,9 @@ intel_gpgpu_load_curbe_buffer_t > *intel_gpgpu_load_curbe_buffer = NULL; > typedef void (intel_gpgpu_load_idrt_t)(intel_gpgpu_t *gpgpu); > intel_gpgpu_load_idrt_t *intel_gpgpu_load_idrt = NULL; > > +typedef void (intel_gpgpu_pipe_control_t)(intel_gpgpu_t *gpgpu); > +intel_gpgpu_pipe_control_t *intel_gpgpu_pipe_control = NULL; > + > static void > intel_gpgpu_sync(void *buf) > { > @@ -542,7 +545,7 @@ intel_gpgpu_write_timestamp(intel_gpgpu_t *gpgpu, int idx) > } > > static void > -intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu) > +intel_gpgpu_pipe_control_gen7(intel_gpgpu_t *gpgpu) > { > gen6_pipe_control_t* pc = (gen6_pipe_control_t*) > intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t)); > @@ -561,6 +564,34 @@ intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu) > } > > static void > +intel_gpgpu_pipe_control_gen75(intel_gpgpu_t *gpgpu) > +{ > + gen6_pipe_control_t* pc = (gen6_pipe_control_t*) > + intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t)); > + memset(pc, 0, sizeof(*pc)); > + pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2; > + pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL; > + pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL; > + pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D; > + pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX; > + pc->dw1.cs_stall = 1; > + pc->dw1.dc_flush_enable = 1; > + > + pc = (gen6_pipe_control_t*) > + intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t)); > + memset(pc, 0, sizeof(*pc)); > + pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2; > + pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL; > + pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL; > + pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D; > + pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX; > + pc->dw1.render_target_cache_flush_enable = 1; > + pc->dw1.texture_cache_invalidation_enable = 1; > + pc->dw1.cs_stall = 1; > + ADVANCE_BATCH(gpgpu->batch); > +} > + > +static void > intel_gpgpu_set_L3_gen7(intel_gpgpu_t *gpgpu, uint32_t use_slm) > { > BEGIN_BATCH(gpgpu->batch, 9); > @@ -1925,6 +1956,7 @@ intel_set_gpgpu_callbacks(int device_id) > intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8; > intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8; > cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) > intel_gpgpu_bind_sampler_gen8; > + intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7; > return; > } > > @@ -1943,6 +1975,7 @@ intel_set_gpgpu_callbacks(int device_id) > intel_gpgpu_post_action = intel_gpgpu_post_action_gen75; > intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7; //HSW same as ivb > intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen75; > + intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen75; > } > else if (IS_IVYBRIDGE(device_id)) { > cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) > intel_gpgpu_bind_image_gen7; > @@ -1957,5 +1990,6 @@ intel_set_gpgpu_callbacks(int device_id) > intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen7; > intel_gpgpu_post_action = intel_gpgpu_post_action_gen7; > intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen7; > + intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7; > } > } > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
