Re: [Mesa-dev] [PATCH 1/5] intel/blorp: Explicitly flush all allocated state
On Tue, Feb 21, 2017 at 4:05 AM, Lionel Landwerlin < lionel.g.landwer...@intel.com> wrote: > On 20/02/17 19:21, Jason Ekstrand wrote: > >> Found by inspection. However, I expect it fixes real bugs when using >> blorp from Vulkan on little-core platforms. >> >> Cc: "13.0 17.0">> --- >> src/intel/blorp/blorp_genX_exec.h | 17 - >> src/intel/vulkan/genX_blorp_exec.c | 8 >> src/mesa/drivers/dri/i965/genX_blorp_exec.c | 8 >> 3 files changed, 32 insertions(+), 1 deletion(-) >> >> diff --git a/src/intel/blorp/blorp_genX_exec.h >> b/src/intel/blorp/blorp_genX_exec.h >> index a673ab8..1e6b05c 100644 >> --- a/src/intel/blorp/blorp_genX_exec.h >> +++ b/src/intel/blorp/blorp_genX_exec.h >> @@ -66,6 +66,10 @@ blorp_alloc_binding_table(struct blorp_batch *batch, >> unsigned num_entries, >> unsigned state_size, unsigned state_alignment, >> uint32_t *bt_offset, uint32_t >> *surface_offsets, >> void **surface_maps); >> + >> +static void >> +blorp_flush_range(struct blorp_batch *batch, void *start, size_t size); >> + >> static void >> blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, >> struct blorp_address address, uint32_t delta); >> @@ -182,6 +186,7 @@ blorp_emit_vertex_data(struct blorp_batch *batch, >> void *data = blorp_alloc_vertex_buffer(batch, sizeof(vertices), >> addr); >> memcpy(data, vertices, sizeof(vertices)); >> *size = sizeof(vertices); >> + blorp_flush_range(batch, data, *size); >> } >> static void >> @@ -199,7 +204,8 @@ blorp_emit_input_varying_data(struct blorp_batch >> *batch, >> *size = 16 + num_varyings * vec4_size_in_bytes; >>const uint32_t *const inputs_src = (const uint32_t >> *)>wm_inputs; >> - uint32_t *inputs = blorp_alloc_vertex_buffer(batch, *size, addr); >> + void *data = blorp_alloc_vertex_buffer(batch, *size, addr); >> + uint32_t *inputs = data; >>/* Copy in the VS inputs */ >> assert(sizeof(params->vs_inputs) == 16); >> @@ -223,6 +229,8 @@ blorp_emit_input_varying_data(struct blorp_batch >> *batch, >>inputs += 4; >> } >> } >> + >> + blorp_flush_range(batch, data, *size); >> } >> static void >> @@ -906,6 +914,7 @@ blorp_emit_blend_state(struct blorp_batch *batch, >> GENX(BLEND_STATE_length) * 4, >> 64, ); >> GENX(BLEND_STATE_pack)(NULL, state, ); >> + blorp_flush_range(batch, state, GENX(BLEND_STATE_length) * 4); >> #if GEN_GEN >= 7 >> blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) { >> @@ -940,6 +949,7 @@ blorp_emit_color_calc_state(struct blorp_batch >> *batch, >> >> GENX(COLOR_CALC_STATE_length) * 4, >> 64, ); >> GENX(COLOR_CALC_STATE_pack)(NULL, state, ); >> + blorp_flush_range(batch, state, GENX(COLOR_CALC_STATE_length) * 4); >> #if GEN_GEN >= 7 >> blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) { >> @@ -1016,6 +1026,7 @@ blorp_emit_depth_stencil_state(struct blorp_batch >> *batch, >> GENX(DEPTH_STENCIL_STATE_length) >> * 4, >> 64, ); >> GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, ); >> + blorp_flush_range(batch, state, GENX(DEPTH_STENCIL_STATE_length) * >> 4); >> #endif >> #if GEN_GEN == 7 >> @@ -1068,6 +1079,8 @@ blorp_emit_surface_state(struct blorp_batch *batch, >> blorp_surface_reloc(batch, state_offset + >> isl_dev->ss.aux_addr_offset, >> surface->aux_addr, *aux_addr); >> } >> + >> + blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * >> 4); >> } >> > > Do we need a flush in blorp_emit_null_surface_state() too? Yes we do. I just realized we may have another missing flush as well. :/ I'll send a v3 > > static void >> @@ -1181,6 +1194,7 @@ blorp_emit_sampler_state(struct blorp_batch *batch, >> GENX(SAMPLER_STATE_length) * >> 4, >> 32, ); >> GENX(SAMPLER_STATE_pack)(NULL, state, ); >> + blorp_flush_range(batch, state, GENX(SAMPLER_STATE_length) * 4); >> #if GEN_GEN >= 7 >> blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) { >> @@ -1333,6 +1347,7 @@ blorp_emit_viewport_state(struct blorp_batch >> *batch, >>.MinimumDepth = 0.0, >>.MaximumDepth = 1.0, >> }); >> + blorp_flush_range(batch, state, GENX(CC_VIEWPORT_length) * 4); >> #if GEN_GEN >= 7 >> blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) { >> diff --git a/src/intel/vulkan/genX_blorp_exec.c >> b/src/intel/vulkan/genX_blorp_exec.c >> index 6f0b063..f7969e5 100644 >> ---
Re: [Mesa-dev] [PATCH 1/5] intel/blorp: Explicitly flush all allocated state
On 20/02/17 19:21, Jason Ekstrand wrote: Found by inspection. However, I expect it fixes real bugs when using blorp from Vulkan on little-core platforms. Cc: "13.0 17.0"--- src/intel/blorp/blorp_genX_exec.h | 17 - src/intel/vulkan/genX_blorp_exec.c | 8 src/mesa/drivers/dri/i965/genX_blorp_exec.c | 8 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index a673ab8..1e6b05c 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -66,6 +66,10 @@ blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries, unsigned state_size, unsigned state_alignment, uint32_t *bt_offset, uint32_t *surface_offsets, void **surface_maps); + +static void +blorp_flush_range(struct blorp_batch *batch, void *start, size_t size); + static void blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, struct blorp_address address, uint32_t delta); @@ -182,6 +186,7 @@ blorp_emit_vertex_data(struct blorp_batch *batch, void *data = blorp_alloc_vertex_buffer(batch, sizeof(vertices), addr); memcpy(data, vertices, sizeof(vertices)); *size = sizeof(vertices); + blorp_flush_range(batch, data, *size); } static void @@ -199,7 +204,8 @@ blorp_emit_input_varying_data(struct blorp_batch *batch, *size = 16 + num_varyings * vec4_size_in_bytes; const uint32_t *const inputs_src = (const uint32_t *)>wm_inputs; - uint32_t *inputs = blorp_alloc_vertex_buffer(batch, *size, addr); + void *data = blorp_alloc_vertex_buffer(batch, *size, addr); + uint32_t *inputs = data; /* Copy in the VS inputs */ assert(sizeof(params->vs_inputs) == 16); @@ -223,6 +229,8 @@ blorp_emit_input_varying_data(struct blorp_batch *batch, inputs += 4; } } + + blorp_flush_range(batch, data, *size); } static void @@ -906,6 +914,7 @@ blorp_emit_blend_state(struct blorp_batch *batch, GENX(BLEND_STATE_length) * 4, 64, ); GENX(BLEND_STATE_pack)(NULL, state, ); + blorp_flush_range(batch, state, GENX(BLEND_STATE_length) * 4); #if GEN_GEN >= 7 blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) { @@ -940,6 +949,7 @@ blorp_emit_color_calc_state(struct blorp_batch *batch, GENX(COLOR_CALC_STATE_length) * 4, 64, ); GENX(COLOR_CALC_STATE_pack)(NULL, state, ); + blorp_flush_range(batch, state, GENX(COLOR_CALC_STATE_length) * 4); #if GEN_GEN >= 7 blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) { @@ -1016,6 +1026,7 @@ blorp_emit_depth_stencil_state(struct blorp_batch *batch, GENX(DEPTH_STENCIL_STATE_length) * 4, 64, ); GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, ); + blorp_flush_range(batch, state, GENX(DEPTH_STENCIL_STATE_length) * 4); #endif #if GEN_GEN == 7 @@ -1068,6 +1079,8 @@ blorp_emit_surface_state(struct blorp_batch *batch, blorp_surface_reloc(batch, state_offset + isl_dev->ss.aux_addr_offset, surface->aux_addr, *aux_addr); } + + blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4); } Do we need a flush in blorp_emit_null_surface_state() too? static void @@ -1181,6 +1194,7 @@ blorp_emit_sampler_state(struct blorp_batch *batch, GENX(SAMPLER_STATE_length) * 4, 32, ); GENX(SAMPLER_STATE_pack)(NULL, state, ); + blorp_flush_range(batch, state, GENX(SAMPLER_STATE_length) * 4); #if GEN_GEN >= 7 blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) { @@ -1333,6 +1347,7 @@ blorp_emit_viewport_state(struct blorp_batch *batch, .MinimumDepth = 0.0, .MaximumDepth = 1.0, }); + blorp_flush_range(batch, state, GENX(CC_VIEWPORT_length) * 4); #if GEN_GEN >= 7 blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) { diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 6f0b063..f7969e5 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -119,6 +119,14 @@ blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size, } static void +blorp_flush_range(struct blorp_batch *batch, void *start, size_t size) +{ + struct anv_device *device = batch->blorp->driver_ctx; + if (device->info.has_llc) + anv_clflush_range(start, size); +} + +static void blorp_emit_urb_config(struct blorp_batch *batch, unsigned
[Mesa-dev] [PATCH 1/5] intel/blorp: Explicitly flush all allocated state
Found by inspection. However, I expect it fixes real bugs when using blorp from Vulkan on little-core platforms. Cc: "13.0 17.0"--- src/intel/blorp/blorp_genX_exec.h | 17 - src/intel/vulkan/genX_blorp_exec.c | 8 src/mesa/drivers/dri/i965/genX_blorp_exec.c | 8 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index a673ab8..1e6b05c 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -66,6 +66,10 @@ blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries, unsigned state_size, unsigned state_alignment, uint32_t *bt_offset, uint32_t *surface_offsets, void **surface_maps); + +static void +blorp_flush_range(struct blorp_batch *batch, void *start, size_t size); + static void blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, struct blorp_address address, uint32_t delta); @@ -182,6 +186,7 @@ blorp_emit_vertex_data(struct blorp_batch *batch, void *data = blorp_alloc_vertex_buffer(batch, sizeof(vertices), addr); memcpy(data, vertices, sizeof(vertices)); *size = sizeof(vertices); + blorp_flush_range(batch, data, *size); } static void @@ -199,7 +204,8 @@ blorp_emit_input_varying_data(struct blorp_batch *batch, *size = 16 + num_varyings * vec4_size_in_bytes; const uint32_t *const inputs_src = (const uint32_t *)>wm_inputs; - uint32_t *inputs = blorp_alloc_vertex_buffer(batch, *size, addr); + void *data = blorp_alloc_vertex_buffer(batch, *size, addr); + uint32_t *inputs = data; /* Copy in the VS inputs */ assert(sizeof(params->vs_inputs) == 16); @@ -223,6 +229,8 @@ blorp_emit_input_varying_data(struct blorp_batch *batch, inputs += 4; } } + + blorp_flush_range(batch, data, *size); } static void @@ -906,6 +914,7 @@ blorp_emit_blend_state(struct blorp_batch *batch, GENX(BLEND_STATE_length) * 4, 64, ); GENX(BLEND_STATE_pack)(NULL, state, ); + blorp_flush_range(batch, state, GENX(BLEND_STATE_length) * 4); #if GEN_GEN >= 7 blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) { @@ -940,6 +949,7 @@ blorp_emit_color_calc_state(struct blorp_batch *batch, GENX(COLOR_CALC_STATE_length) * 4, 64, ); GENX(COLOR_CALC_STATE_pack)(NULL, state, ); + blorp_flush_range(batch, state, GENX(COLOR_CALC_STATE_length) * 4); #if GEN_GEN >= 7 blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) { @@ -1016,6 +1026,7 @@ blorp_emit_depth_stencil_state(struct blorp_batch *batch, GENX(DEPTH_STENCIL_STATE_length) * 4, 64, ); GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, ); + blorp_flush_range(batch, state, GENX(DEPTH_STENCIL_STATE_length) * 4); #endif #if GEN_GEN == 7 @@ -1068,6 +1079,8 @@ blorp_emit_surface_state(struct blorp_batch *batch, blorp_surface_reloc(batch, state_offset + isl_dev->ss.aux_addr_offset, surface->aux_addr, *aux_addr); } + + blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4); } static void @@ -1181,6 +1194,7 @@ blorp_emit_sampler_state(struct blorp_batch *batch, GENX(SAMPLER_STATE_length) * 4, 32, ); GENX(SAMPLER_STATE_pack)(NULL, state, ); + blorp_flush_range(batch, state, GENX(SAMPLER_STATE_length) * 4); #if GEN_GEN >= 7 blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) { @@ -1333,6 +1347,7 @@ blorp_emit_viewport_state(struct blorp_batch *batch, .MinimumDepth = 0.0, .MaximumDepth = 1.0, }); + blorp_flush_range(batch, state, GENX(CC_VIEWPORT_length) * 4); #if GEN_GEN >= 7 blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) { diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 6f0b063..f7969e5 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -119,6 +119,14 @@ blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size, } static void +blorp_flush_range(struct blorp_batch *batch, void *start, size_t size) +{ + struct anv_device *device = batch->blorp->driver_ctx; + if (device->info.has_llc) + anv_clflush_range(start, size); +} + +static void blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size) { struct anv_device *device = batch->blorp->driver_ctx; diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index