Module: Mesa
Branch: master
Commit: 695a4cb0f6120463d16d3e0ef747675a83b769ea
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=695a4cb0f6120463d16d3e0ef747675a83b769ea

Author: Gert Wollny <[email protected]>
Date:   Thu Jul  5 19:11:22 2018 +0200

r600: Add spill output to group only if register or target index changes

The current spill code checks in each instruction of an instruction group 
whether
spilling is needed and if so, it adds spilling for each component as a seperate
instruction and it allocates a new temporary for each component and since it 
takes
the write mask from the TGSI representation, all components might be written
each time and as a result already written components might be overwritten with
garbage like:

   ...
   y: MOV                R9.y,  [0x42140000 37].x
   t: MOV                R8.x,  [0x42040000 33].y
   ...
   MEM_SCRATCH  WRITE_IND_ACK 0     R9.xy__, @R4.x  ES:3
   MEM_SCRATCH  WRITE_IND_ACK 0     R8.xy__, @R4.x  ES:3
   ...

To resolve this isse accumulate spills to the same memory location so that only 
one
memory write instruction is emitted for an instruction group that writes up to 
all
four components.

This fixes updated piglits (see 
https://patchwork.freedesktop.org/series/46064/):
   spec/glsl-1.30/execution
       fs-large-local-array-vec2.shader_test
       fs-large-local-array-vec3.shader_test
       fs-large-local-array-vec4.shader_test

v2: fix some typos and add comment about piglits (Roland Scheidegger)

Signed-off-by: Gert Wollny <[email protected]>
Reviewed-by: Roland Scheidegger <[email protected]> (v1)

---

 src/gallium/drivers/r600/r600_shader.c | 69 ++++++++++++++++++++++------------
 1 file changed, 45 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index c466a48262..4b91dacccc 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4339,8 +4339,32 @@ static void tgsi_dst(struct r600_shader_ctx *ctx,
 
                if (spilled) {
                        struct r600_bytecode_output cf;
-                       int reg = r600_get_temp(ctx);
+                       int reg = 0;
                        int r;
+                       bool add_pending_output = true;
+
+                       memset(&cf, 0, sizeof(struct r600_bytecode_output));
+                       get_spilled_array_base_and_size(ctx, 
tgsi_dst->Register.Index,
+                               &cf.array_base, &cf.array_size);
+
+                       /* If no component has spilled, reserve a register and 
add the spill code
+                        *  ctx->bc->n_pending_outputs is cleared after each 
instruction group */
+                       if (ctx->bc->n_pending_outputs == 0) {
+                               reg = r600_get_temp(ctx);
+                       } else {
+                               /* If we are already spilling and the output 
address is the same like
+                               * before then just reuse the same slot */
+                               struct r600_bytecode_output *tmpl = 
&ctx->bc->pending_outputs[ctx->bc->n_pending_outputs-1];
+                               if ((cf.array_base + idx == tmpl->array_base) ||
+                                   (cf.array_base == tmpl->array_base &&
+                                    tmpl->index_gpr == ctx->bc->ar_reg &&
+                                    tgsi_dst->Register.Indirect)) {
+                                       reg = ctx->bc->pending_outputs[0].gpr;
+                                       add_pending_output = false;
+                               } else {
+                                       reg = r600_get_temp(ctx);
+                               }
+                       }
 
                        r600_dst->sel = reg;
                        r600_dst->chan = swizzle;
@@ -4349,29 +4373,26 @@ static void tgsi_dst(struct r600_shader_ctx *ctx,
                                r600_dst->clamp = 1;
                        }
 
-                       // needs to be added after op using tgsi_dst
-                       memset(&cf, 0, sizeof(struct r600_bytecode_output));
-                       cf.op = CF_OP_MEM_SCRATCH;
-                       cf.elem_size = 3;
-                       cf.gpr = reg;
-                       cf.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
-                       cf.mark = 1;
-                       cf.comp_mask = inst->Dst[0].Register.WriteMask;
-                       cf.swizzle_x = 0;
-                       cf.swizzle_y = 1;
-                       cf.swizzle_z = 2;
-                       cf.swizzle_w = 3;
-                       cf.burst_count = 1;
-
-                       get_spilled_array_base_and_size(ctx, 
tgsi_dst->Register.Index,
-                               &cf.array_base, &cf.array_size);
+                       /* Add new outputs as pending */
+                       if (add_pending_output) {
+                               cf.op = CF_OP_MEM_SCRATCH;
+                               cf.elem_size = 3;
+                               cf.gpr = reg;
+                               cf.type = 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
+                               cf.mark = 1;
+                               cf.comp_mask = inst->Dst[0].Register.WriteMask;
+                               cf.swizzle_x = 0;
+                               cf.swizzle_y = 1;
+                               cf.swizzle_z = 2;
+                               cf.swizzle_w = 3;
+                               cf.burst_count = 1;
 
-                       if (tgsi_dst->Register.Indirect) {
-                               if (ctx->bc->chip_class < R700)
-                                       cf.type = 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
-                               else
-                                       cf.type = 3; // 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK;
-                               cf.index_gpr = ctx->bc->ar_reg;
+                               if (tgsi_dst->Register.Indirect) {
+                                       if (ctx->bc->chip_class < R700)
+                                               cf.type = 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
+                                       else
+                                               cf.type = 3; // 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK;
+                                       cf.index_gpr = ctx->bc->ar_reg;
                        }
                        else {
                                cf.array_base += idx;
@@ -4384,7 +4405,7 @@ static void tgsi_dst(struct r600_shader_ctx *ctx,
 
                        if (ctx->bc->chip_class >= R700)
                                r600_bytecode_need_wait_ack(ctx->bc, true);
-
+                       }
                        return;
                }
                else {

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to