Module: Mesa
Branch: master
Commit: 42cd2810aa30dfd825d46a305134fbc8f239247c
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=42cd2810aa30dfd825d46a305134fbc8f239247c

Author: Gert Wollny <gw.foss...@gmail.com>
Date:   Sat May 26 18:48:31 2018 +0200

r600: Correct IDIV if DST and SRC use the same temporary

In cases like

  IDIV TEMP[0].xy TEMP[0].xx TEMP[1].yy

the result will be written to the same register that is also a source register.
Since the components are evaluated one by one, this may result in overwriting
the source value for a later operation. Work around this by adding another
temporary to store the result if the destination temporary index is equal to
one of the source temporary indices.

Fixes:
  dEQP-GLES2.functional.shaders.operator.binary_operator.div.*
Signed-off-by: Gert Wollny <gw.foss...@gmail.com>
Reviewed-by: Dave Airlie <airl...@redhat.com>

---

 src/gallium/drivers/r600/r600_shader.c | 52 ++++++++++++++++++++++++++++++++--
 1 file changed, 49 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index bd511c76ac..da89bb28e7 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5727,10 +5727,19 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int 
mod, int signed_op)
        struct r600_bytecode_alu alu;
        int i, r, j;
        unsigned write_mask = inst->Dst[0].Register.WriteMask;
+       int lasti = tgsi_last_instruction(write_mask);
        int tmp0 = ctx->temp_reg;
        int tmp1 = r600_get_temp(ctx);
        int tmp2 = r600_get_temp(ctx);
        int tmp3 = r600_get_temp(ctx);
+       int tmp4 = 0;
+
+       /* Use additional temp if dst register and src register are the same */
+       if (inst->Src[0].Register.Index == inst->Dst[0].Register.Index ||
+           inst->Src[1].Register.Index == inst->Dst[0].Register.Index) {
+               tmp4 = r600_get_temp(ctx);
+       }
+
        /* Unsigned path:
         *
         * we need to represent src1 as src2*q + r, where q - quotient, r - 
remainder
@@ -6345,7 +6354,13 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int 
mod, int signed_op)
                        alu.dst.chan = 2;
                        alu.dst.write = 1;
                } else {
-                       tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                       if (tmp4 > 0) {
+                               alu.dst.sel = tmp4;
+                               alu.dst.chan = i;
+                               alu.dst.write = 1;
+                       } else {
+                               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                       }
                }
 
                alu.src[0].sel = tmp1;
@@ -6387,7 +6402,13 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int 
mod, int signed_op)
                                alu.op = ALU_OP3_CNDGE_INT;
                                alu.is_op3 = 1;
 
-                               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                               if (tmp4 > 0) {
+                                       alu.dst.sel = tmp4;
+                                       alu.dst.chan = i;
+                                       alu.dst.write = 1;
+                               } else {
+                                       tgsi_dst(ctx, &inst->Dst[0], i, 
&alu.dst);
+                               }
 
                                r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
                                alu.src[1].sel = tmp0;
@@ -6423,7 +6444,13 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int 
mod, int signed_op)
                                alu.op = ALU_OP3_CNDGE_INT;
                                alu.is_op3 = 1;
 
-                               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                               if (tmp4 > 0) {
+                                       alu.dst.sel = tmp4;
+                                       alu.dst.chan = i;
+                                       alu.dst.write = 1;
+                               } else {
+                                       tgsi_dst(ctx, &inst->Dst[0], i, 
&alu.dst);
+                               }
 
                                alu.src[0].sel = tmp2;
                                alu.src[0].chan = 2;
@@ -6438,6 +6465,25 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int 
mod, int signed_op)
                        }
                }
        }
+
+       if (tmp4 > 0) {
+               for (i = 0; i <= lasti; ++i) {
+                       if (!(write_mask & (1<<i)))
+                               continue;
+
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.op = ALU_OP1_MOV;
+                       tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                       alu.src[0].sel = tmp4;
+                       alu.src[0].chan = i;
+
+                       if (i == lasti)
+                               alu.last = 1;
+                       if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+                               return r;
+               }
+       }
+
        return 0;
 }
 

_______________________________________________
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to