Re: [Mesa-dev] [PATCH 30/37] i965/gen6/gs: Buffer PSIZ/flags vertex data in gen6_gs_visitor

2014-09-18 Thread Jordan Justen
Reviewed-by: Jordan Justen jordan.l.jus...@intel.com

On Thu, Aug 14, 2014 at 4:12 AM, Iago Toral Quiroga ito...@igalia.com wrote:
 From: Samuel Iglesias Gonsalvez sigles...@igalia.com

 Since geometry shaders can alter the value of varyings packed in the first
 output VUE slot (PSIZ), we need to buffer it together with all the other
 vertex data so we can emit the right value for each vertex when we do the
 URB writes.

 This fixes the following piglit test in gen6:
 tests/spec/glsl-1.50/execution/redeclare-pervertex-out-subset-gs.shader_test

 Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com
 ---
  src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 79 
 ++-
  1 file changed, 41 insertions(+), 38 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp 
 b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 index b8eaa58..fca7536 100644
 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 @@ -178,16 +178,33 @@ gen6_gs_visitor::visit(ir_emit_vertex *)

/* Buffer all output slots for this vertex in vertex_output */
for (int slot = 0; slot  prog_data-vue_map.num_slots; ++slot) {
 - /* We will handle PSIZ for each vertex at thread end time since it
 -  * is not computed by the GS algorithm and requires specific 
 handling.
 -  */
   int varying = prog_data-vue_map.slot_to_varying[slot];
   if (varying != VARYING_SLOT_PSIZ) {
  dst_reg dst(this-vertex_output);
  dst.reladdr = ralloc(mem_ctx, src_reg);
  memcpy(dst.reladdr, this-vertex_output_offset, 
 sizeof(src_reg));
  emit_urb_slot(dst, varying);
 + } else {
 +/* The PSIZ slot can pack multiple varyings in different channels
 + * and emit_urb_slot() will produce a MOV instruction for each of
 + * them. Since we are writing to an array, that will translate to
 + * possibly multiple MOV instructions with an array destination 
 and
 + * each will generate a scratch write with the same offset into
 + * scratch space (thus, each one overwriting the previous). This 
 is
 + * not what we want. What we will do instead is emit PSIZ to a
 + * a regular temporary register, then move that resgister into 
 the
 + * array. This way we only have one instruction with an array
 + * destination and we only produce a single scratch write.
 + */
 +dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type));
 +emit_urb_slot(tmp, varying);
 +dst_reg dst(this-vertex_output);
 +dst.reladdr = ralloc(mem_ctx, src_reg);
 +memcpy(dst.reladdr, this-vertex_output_offset, 
 sizeof(src_reg));
 +vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
 +inst-force_writemask_all = true;
   }
 +
   emit(ADD(dst_reg(this-vertex_output_offset),
this-vertex_output_offset, 1u));
}
 @@ -427,17 +444,12 @@ gen6_gs_visitor::emit_thread_end()
 memcpy(data.reladdr, this-vertex_output_offset,
sizeof(src_reg));

 -   if (varying == VARYING_SLOT_PSIZ) {
 -  /* We did not buffer PSIZ, emit it directly here */
 -  emit_urb_slot(dst_reg(MRF, mrf), varying);
 -   } else {
 -  /* Copy this slot to the appropriate message register */
 -  dst_reg reg = dst_reg(MRF, mrf);
 -  reg.type = output_reg[varying].type;
 -  data.type = reg.type;
 -  vec4_instruction *inst = emit(MOV(reg, data));
 -  inst-force_writemask_all = true;
 -   }
 +   /* Copy this slot to the appropriate message register */
 +   dst_reg reg = dst_reg(MRF, mrf);
 +   reg.type = output_reg[varying].type;
 +   data.type = reg.type;
 +   vec4_instruction *inst = emit(MOV(reg, data));
 +   inst-force_writemask_all = true;

 mrf++;
 emit(ADD(dst_reg(this-vertex_output_offset),
 @@ -585,22 +597,19 @@ gen6_gs_visitor::xfb_buffer_output()
 /* Buffer all TF outputs for this vertex in xfb_output */
 for (int binding = 0; binding  
 prog_data-num_transform_feedback_bindings;
  binding++) {
 -  /* We will handle PSIZ for each vertex at thread end time since it
 -   * is not computed by the GS algorithm and requires specific handling.
 -   */
unsigned varying =
   prog_data-transform_feedback_bindings[binding];
 -  if (varying != VARYING_SLOT_PSIZ) {
 - dst_reg dst(this-xfb_output);
 - dst.reladdr = ralloc(mem_ctx, src_reg);
 - memcpy(dst.reladdr, this-xfb_output_offset, sizeof(src_reg));
 - dst.type = 

[Mesa-dev] [PATCH 30/37] i965/gen6/gs: Buffer PSIZ/flags vertex data in gen6_gs_visitor

2014-08-14 Thread Iago Toral Quiroga
From: Samuel Iglesias Gonsalvez sigles...@igalia.com

Since geometry shaders can alter the value of varyings packed in the first
output VUE slot (PSIZ), we need to buffer it together with all the other
vertex data so we can emit the right value for each vertex when we do the
URB writes.

This fixes the following piglit test in gen6:
tests/spec/glsl-1.50/execution/redeclare-pervertex-out-subset-gs.shader_test

Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com
---
 src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 79 ++-
 1 file changed, 41 insertions(+), 38 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp 
b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
index b8eaa58..fca7536 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -178,16 +178,33 @@ gen6_gs_visitor::visit(ir_emit_vertex *)
 
   /* Buffer all output slots for this vertex in vertex_output */
   for (int slot = 0; slot  prog_data-vue_map.num_slots; ++slot) {
- /* We will handle PSIZ for each vertex at thread end time since it
-  * is not computed by the GS algorithm and requires specific handling.
-  */
  int varying = prog_data-vue_map.slot_to_varying[slot];
  if (varying != VARYING_SLOT_PSIZ) {
 dst_reg dst(this-vertex_output);
 dst.reladdr = ralloc(mem_ctx, src_reg);
 memcpy(dst.reladdr, this-vertex_output_offset, sizeof(src_reg));
 emit_urb_slot(dst, varying);
+ } else {
+/* The PSIZ slot can pack multiple varyings in different channels
+ * and emit_urb_slot() will produce a MOV instruction for each of
+ * them. Since we are writing to an array, that will translate to
+ * possibly multiple MOV instructions with an array destination and
+ * each will generate a scratch write with the same offset into
+ * scratch space (thus, each one overwriting the previous). This is
+ * not what we want. What we will do instead is emit PSIZ to a
+ * a regular temporary register, then move that resgister into the
+ * array. This way we only have one instruction with an array
+ * destination and we only produce a single scratch write.
+ */
+dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type));
+emit_urb_slot(tmp, varying);
+dst_reg dst(this-vertex_output);
+dst.reladdr = ralloc(mem_ctx, src_reg);
+memcpy(dst.reladdr, this-vertex_output_offset, sizeof(src_reg));
+vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
+inst-force_writemask_all = true;
  }
+
  emit(ADD(dst_reg(this-vertex_output_offset),
   this-vertex_output_offset, 1u));
   }
@@ -427,17 +444,12 @@ gen6_gs_visitor::emit_thread_end()
memcpy(data.reladdr, this-vertex_output_offset,
   sizeof(src_reg));
 
-   if (varying == VARYING_SLOT_PSIZ) {
-  /* We did not buffer PSIZ, emit it directly here */
-  emit_urb_slot(dst_reg(MRF, mrf), varying);
-   } else {
-  /* Copy this slot to the appropriate message register */
-  dst_reg reg = dst_reg(MRF, mrf);
-  reg.type = output_reg[varying].type;
-  data.type = reg.type;
-  vec4_instruction *inst = emit(MOV(reg, data));
-  inst-force_writemask_all = true;
-   }
+   /* Copy this slot to the appropriate message register */
+   dst_reg reg = dst_reg(MRF, mrf);
+   reg.type = output_reg[varying].type;
+   data.type = reg.type;
+   vec4_instruction *inst = emit(MOV(reg, data));
+   inst-force_writemask_all = true;
 
mrf++;
emit(ADD(dst_reg(this-vertex_output_offset),
@@ -585,22 +597,19 @@ gen6_gs_visitor::xfb_buffer_output()
/* Buffer all TF outputs for this vertex in xfb_output */
for (int binding = 0; binding  prog_data-num_transform_feedback_bindings;
 binding++) {
-  /* We will handle PSIZ for each vertex at thread end time since it
-   * is not computed by the GS algorithm and requires specific handling.
-   */
   unsigned varying =
  prog_data-transform_feedback_bindings[binding];
-  if (varying != VARYING_SLOT_PSIZ) {
- dst_reg dst(this-xfb_output);
- dst.reladdr = ralloc(mem_ctx, src_reg);
- memcpy(dst.reladdr, this-xfb_output_offset, sizeof(src_reg));
- dst.type = output_reg[varying].type;
+  dst_reg dst(this-xfb_output);
+  dst.reladdr = ralloc(mem_ctx, src_reg);
+  memcpy(dst.reladdr, this-xfb_output_offset, sizeof(src_reg));
+  dst.type = output_reg[varying].type;
+
+