Re: [Mesa-dev] [PATCH 1/5] intel/blorp: Explicitly flush all allocated state

2017-02-21 Thread Jason Ekstrand
On Tue, Feb 21, 2017 at 4:05 AM, Lionel Landwerlin <
lionel.g.landwer...@intel.com> wrote:

> On 20/02/17 19:21, Jason Ekstrand wrote:
>
>> Found by inspection.  However, I expect it fixes real bugs when using
>> blorp from Vulkan on little-core platforms.
>>
>> Cc: "13.0 17.0" 
>> ---
>>   src/intel/blorp/blorp_genX_exec.h   | 17 -
>>   src/intel/vulkan/genX_blorp_exec.c  |  8 
>>   src/mesa/drivers/dri/i965/genX_blorp_exec.c |  8 
>>   3 files changed, 32 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/intel/blorp/blorp_genX_exec.h
>> b/src/intel/blorp/blorp_genX_exec.h
>> index a673ab8..1e6b05c 100644
>> --- a/src/intel/blorp/blorp_genX_exec.h
>> +++ b/src/intel/blorp/blorp_genX_exec.h
>> @@ -66,6 +66,10 @@ blorp_alloc_binding_table(struct blorp_batch *batch,
>> unsigned num_entries,
>> unsigned state_size, unsigned state_alignment,
>> uint32_t *bt_offset, uint32_t
>> *surface_offsets,
>> void **surface_maps);
>> +
>> +static void
>> +blorp_flush_range(struct blorp_batch *batch, void *start, size_t size);
>> +
>>   static void
>>   blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
>>   struct blorp_address address, uint32_t delta);
>> @@ -182,6 +186,7 @@ blorp_emit_vertex_data(struct blorp_batch *batch,
>>  void *data = blorp_alloc_vertex_buffer(batch, sizeof(vertices),
>> addr);
>>  memcpy(data, vertices, sizeof(vertices));
>>  *size = sizeof(vertices);
>> +   blorp_flush_range(batch, data, *size);
>>   }
>> static void
>> @@ -199,7 +204,8 @@ blorp_emit_input_varying_data(struct blorp_batch
>> *batch,
>>  *size = 16 + num_varyings * vec4_size_in_bytes;
>>const uint32_t *const inputs_src = (const uint32_t
>> *)>wm_inputs;
>> -   uint32_t *inputs = blorp_alloc_vertex_buffer(batch, *size, addr);
>> +   void *data = blorp_alloc_vertex_buffer(batch, *size, addr);
>> +   uint32_t *inputs = data;
>>/* Copy in the VS inputs */
>>  assert(sizeof(params->vs_inputs) == 16);
>> @@ -223,6 +229,8 @@ blorp_emit_input_varying_data(struct blorp_batch
>> *batch,
>>inputs += 4;
>> }
>>  }
>> +
>> +   blorp_flush_range(batch, data, *size);
>>   }
>> static void
>> @@ -906,6 +914,7 @@ blorp_emit_blend_state(struct blorp_batch *batch,
>>  GENX(BLEND_STATE_length) * 4,
>>  64, );
>>  GENX(BLEND_STATE_pack)(NULL, state, );
>> +   blorp_flush_range(batch, state, GENX(BLEND_STATE_length) * 4);
>> #if GEN_GEN >= 7
>>  blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {
>> @@ -940,6 +949,7 @@ blorp_emit_color_calc_state(struct blorp_batch
>> *batch,
>>
>>  GENX(COLOR_CALC_STATE_length) * 4,
>>  64, );
>>  GENX(COLOR_CALC_STATE_pack)(NULL, state, );
>> +   blorp_flush_range(batch, state, GENX(COLOR_CALC_STATE_length) * 4);
>> #if GEN_GEN >= 7
>>  blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) {
>> @@ -1016,6 +1026,7 @@ blorp_emit_depth_stencil_state(struct blorp_batch
>> *batch,
>>  GENX(DEPTH_STENCIL_STATE_length)
>> * 4,
>>  64, );
>>  GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, );
>> +   blorp_flush_range(batch, state, GENX(DEPTH_STENCIL_STATE_length) *
>> 4);
>>   #endif
>> #if GEN_GEN == 7
>> @@ -1068,6 +1079,8 @@ blorp_emit_surface_state(struct blorp_batch *batch,
>> blorp_surface_reloc(batch, state_offset +
>> isl_dev->ss.aux_addr_offset,
>> surface->aux_addr, *aux_addr);
>>  }
>> +
>> +   blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) *
>> 4);
>>   }
>>
>
> Do we need a flush in blorp_emit_null_surface_state() too?


Yes we do.  I just realized we may have another missing flush as well. :/
I'll send a v3


>
> static void
>> @@ -1181,6 +1194,7 @@ blorp_emit_sampler_state(struct blorp_batch *batch,
>>  GENX(SAMPLER_STATE_length) *
>> 4,
>>  32, );
>>  GENX(SAMPLER_STATE_pack)(NULL, state, );
>> +   blorp_flush_range(batch, state, GENX(SAMPLER_STATE_length) * 4);
>> #if GEN_GEN >= 7
>>  blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) {
>> @@ -1333,6 +1347,7 @@ blorp_emit_viewport_state(struct blorp_batch
>> *batch,
>>.MinimumDepth = 0.0,
>>.MaximumDepth = 1.0,
>> });
>> +   blorp_flush_range(batch, state, GENX(CC_VIEWPORT_length) * 4);
>> #if GEN_GEN >= 7
>>  blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) {
>> diff --git a/src/intel/vulkan/genX_blorp_exec.c
>> b/src/intel/vulkan/genX_blorp_exec.c
>> index 6f0b063..f7969e5 100644
>> --- 

Re: [Mesa-dev] [PATCH 1/5] intel/blorp: Explicitly flush all allocated state

2017-02-21 Thread Lionel Landwerlin

On 20/02/17 19:21, Jason Ekstrand wrote:

Found by inspection.  However, I expect it fixes real bugs when using
blorp from Vulkan on little-core platforms.

Cc: "13.0 17.0" 
---
  src/intel/blorp/blorp_genX_exec.h   | 17 -
  src/intel/vulkan/genX_blorp_exec.c  |  8 
  src/mesa/drivers/dri/i965/genX_blorp_exec.c |  8 
  3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/intel/blorp/blorp_genX_exec.h 
b/src/intel/blorp/blorp_genX_exec.h
index a673ab8..1e6b05c 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -66,6 +66,10 @@ blorp_alloc_binding_table(struct blorp_batch *batch, 
unsigned num_entries,
unsigned state_size, unsigned state_alignment,
uint32_t *bt_offset, uint32_t *surface_offsets,
void **surface_maps);
+
+static void
+blorp_flush_range(struct blorp_batch *batch, void *start, size_t size);
+
  static void
  blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
  struct blorp_address address, uint32_t delta);
@@ -182,6 +186,7 @@ blorp_emit_vertex_data(struct blorp_batch *batch,
 void *data = blorp_alloc_vertex_buffer(batch, sizeof(vertices), addr);
 memcpy(data, vertices, sizeof(vertices));
 *size = sizeof(vertices);
+   blorp_flush_range(batch, data, *size);
  }
  
  static void

@@ -199,7 +204,8 @@ blorp_emit_input_varying_data(struct blorp_batch *batch,
 *size = 16 + num_varyings * vec4_size_in_bytes;
  
 const uint32_t *const inputs_src = (const uint32_t *)>wm_inputs;

-   uint32_t *inputs = blorp_alloc_vertex_buffer(batch, *size, addr);
+   void *data = blorp_alloc_vertex_buffer(batch, *size, addr);
+   uint32_t *inputs = data;
  
 /* Copy in the VS inputs */

 assert(sizeof(params->vs_inputs) == 16);
@@ -223,6 +229,8 @@ blorp_emit_input_varying_data(struct blorp_batch *batch,
   inputs += 4;
}
 }
+
+   blorp_flush_range(batch, data, *size);
  }
  
  static void

@@ -906,6 +914,7 @@ blorp_emit_blend_state(struct blorp_batch *batch,
 GENX(BLEND_STATE_length) * 4,
 64, );
 GENX(BLEND_STATE_pack)(NULL, state, );
+   blorp_flush_range(batch, state, GENX(BLEND_STATE_length) * 4);
  
  #if GEN_GEN >= 7

 blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {
@@ -940,6 +949,7 @@ blorp_emit_color_calc_state(struct blorp_batch *batch,
 GENX(COLOR_CALC_STATE_length) * 4,
 64, );
 GENX(COLOR_CALC_STATE_pack)(NULL, state, );
+   blorp_flush_range(batch, state, GENX(COLOR_CALC_STATE_length) * 4);
  
  #if GEN_GEN >= 7

 blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) {
@@ -1016,6 +1026,7 @@ blorp_emit_depth_stencil_state(struct blorp_batch *batch,
 GENX(DEPTH_STENCIL_STATE_length) * 
4,
 64, );
 GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, );
+   blorp_flush_range(batch, state, GENX(DEPTH_STENCIL_STATE_length) * 4);
  #endif
  
  #if GEN_GEN == 7

@@ -1068,6 +1079,8 @@ blorp_emit_surface_state(struct blorp_batch *batch,
blorp_surface_reloc(batch, state_offset + isl_dev->ss.aux_addr_offset,
surface->aux_addr, *aux_addr);
 }
+
+   blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4);
  }


Do we need a flush in blorp_emit_null_surface_state() too?

  
  static void

@@ -1181,6 +1194,7 @@ blorp_emit_sampler_state(struct blorp_batch *batch,
 GENX(SAMPLER_STATE_length) * 4,
 32, );
 GENX(SAMPLER_STATE_pack)(NULL, state, );
+   blorp_flush_range(batch, state, GENX(SAMPLER_STATE_length) * 4);
  
  #if GEN_GEN >= 7

 blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) {
@@ -1333,6 +1347,7 @@ blorp_emit_viewport_state(struct blorp_batch *batch,
   .MinimumDepth = 0.0,
   .MaximumDepth = 1.0,
});
+   blorp_flush_range(batch, state, GENX(CC_VIEWPORT_length) * 4);
  
  #if GEN_GEN >= 7

 blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) {
diff --git a/src/intel/vulkan/genX_blorp_exec.c 
b/src/intel/vulkan/genX_blorp_exec.c
index 6f0b063..f7969e5 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -119,6 +119,14 @@ blorp_alloc_vertex_buffer(struct blorp_batch *batch, 
uint32_t size,
  }
  
  static void

+blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
+{
+   struct anv_device *device = batch->blorp->driver_ctx;
+   if (device->info.has_llc)
+  anv_clflush_range(start, size);
+}
+
+static void
  blorp_emit_urb_config(struct blorp_batch *batch, unsigned 

[Mesa-dev] [PATCH 1/5] intel/blorp: Explicitly flush all allocated state

2017-02-20 Thread Jason Ekstrand
Found by inspection.  However, I expect it fixes real bugs when using
blorp from Vulkan on little-core platforms.

Cc: "13.0 17.0" 
---
 src/intel/blorp/blorp_genX_exec.h   | 17 -
 src/intel/vulkan/genX_blorp_exec.c  |  8 
 src/mesa/drivers/dri/i965/genX_blorp_exec.c |  8 
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/intel/blorp/blorp_genX_exec.h 
b/src/intel/blorp/blorp_genX_exec.h
index a673ab8..1e6b05c 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -66,6 +66,10 @@ blorp_alloc_binding_table(struct blorp_batch *batch, 
unsigned num_entries,
   unsigned state_size, unsigned state_alignment,
   uint32_t *bt_offset, uint32_t *surface_offsets,
   void **surface_maps);
+
+static void
+blorp_flush_range(struct blorp_batch *batch, void *start, size_t size);
+
 static void
 blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
 struct blorp_address address, uint32_t delta);
@@ -182,6 +186,7 @@ blorp_emit_vertex_data(struct blorp_batch *batch,
void *data = blorp_alloc_vertex_buffer(batch, sizeof(vertices), addr);
memcpy(data, vertices, sizeof(vertices));
*size = sizeof(vertices);
+   blorp_flush_range(batch, data, *size);
 }
 
 static void
@@ -199,7 +204,8 @@ blorp_emit_input_varying_data(struct blorp_batch *batch,
*size = 16 + num_varyings * vec4_size_in_bytes;
 
const uint32_t *const inputs_src = (const uint32_t *)>wm_inputs;
-   uint32_t *inputs = blorp_alloc_vertex_buffer(batch, *size, addr);
+   void *data = blorp_alloc_vertex_buffer(batch, *size, addr);
+   uint32_t *inputs = data;
 
/* Copy in the VS inputs */
assert(sizeof(params->vs_inputs) == 16);
@@ -223,6 +229,8 @@ blorp_emit_input_varying_data(struct blorp_batch *batch,
  inputs += 4;
   }
}
+
+   blorp_flush_range(batch, data, *size);
 }
 
 static void
@@ -906,6 +914,7 @@ blorp_emit_blend_state(struct blorp_batch *batch,
GENX(BLEND_STATE_length) * 4,
64, );
GENX(BLEND_STATE_pack)(NULL, state, );
+   blorp_flush_range(batch, state, GENX(BLEND_STATE_length) * 4);
 
 #if GEN_GEN >= 7
blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {
@@ -940,6 +949,7 @@ blorp_emit_color_calc_state(struct blorp_batch *batch,
GENX(COLOR_CALC_STATE_length) * 4,
64, );
GENX(COLOR_CALC_STATE_pack)(NULL, state, );
+   blorp_flush_range(batch, state, GENX(COLOR_CALC_STATE_length) * 4);
 
 #if GEN_GEN >= 7
blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) {
@@ -1016,6 +1026,7 @@ blorp_emit_depth_stencil_state(struct blorp_batch *batch,
GENX(DEPTH_STENCIL_STATE_length) * 
4,
64, );
GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, );
+   blorp_flush_range(batch, state, GENX(DEPTH_STENCIL_STATE_length) * 4);
 #endif
 
 #if GEN_GEN == 7
@@ -1068,6 +1079,8 @@ blorp_emit_surface_state(struct blorp_batch *batch,
   blorp_surface_reloc(batch, state_offset + isl_dev->ss.aux_addr_offset,
   surface->aux_addr, *aux_addr);
}
+
+   blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4);
 }
 
 static void
@@ -1181,6 +1194,7 @@ blorp_emit_sampler_state(struct blorp_batch *batch,
GENX(SAMPLER_STATE_length) * 4,
32, );
GENX(SAMPLER_STATE_pack)(NULL, state, );
+   blorp_flush_range(batch, state, GENX(SAMPLER_STATE_length) * 4);
 
 #if GEN_GEN >= 7
blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) {
@@ -1333,6 +1347,7 @@ blorp_emit_viewport_state(struct blorp_batch *batch,
  .MinimumDepth = 0.0,
  .MaximumDepth = 1.0,
   });
+   blorp_flush_range(batch, state, GENX(CC_VIEWPORT_length) * 4);
 
 #if GEN_GEN >= 7
blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) {
diff --git a/src/intel/vulkan/genX_blorp_exec.c 
b/src/intel/vulkan/genX_blorp_exec.c
index 6f0b063..f7969e5 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -119,6 +119,14 @@ blorp_alloc_vertex_buffer(struct blorp_batch *batch, 
uint32_t size,
 }
 
 static void
+blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
+{
+   struct anv_device *device = batch->blorp->driver_ctx;
+   if (device->info.has_llc)
+  anv_clflush_range(start, size);
+}
+
+static void
 blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size)
 {
struct anv_device *device = batch->blorp->driver_ctx;
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c 
b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index