Mesa (master): r600: implement callstack workaround for evergreen.

Dave Airlie Sun, 11 Mar 2018 18:24:06 -0700

Module: Mesa
Branch: master
Commit: 5d4fbc2b54cb2aaea1cbb52ec087f31009f3ac76
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5d4fbc2b54cb2aaea1cbb52ec087f31009f3ac76


Author: Dave Airlie <airl...@redhat.com>
Date:   Fri Mar  9 16:03:53 2018 +1000

r600: implement callstack workaround for evergreen.

This is ported from the sb backend, there are some issues with
evergreen stacks on the boundary between entries and ALU_PUSH_BEFORE
instructions.

Whenever we are going to use a push before, we check the stack
usage and if we have to use the workaround, then we switch to
a separate push.

I noticed this problem dealing with some of the soft fp64 shaders,
in nosb mode, they are quite stack happy.

This fixes all the glitches and inconsistencies I've seen with them

Reviewed-by: Roland Scheidegger <srol...@vmware.com>
Tested-by: Elie Tournier <elie.tourn...@collabora.com>
Cc: <mesa-sta...@lists.freedesktop.org>
Signed-off-by: Dave Airlie <airl...@redhat.com>

---

 src/gallium/drivers/r600/r600_shader.c | 39 +++++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 4b44f66141..6b5c42f86d 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -377,7 +377,7 @@ struct r600_shader_tgsi_instruction {
 static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct 
pipe_stream_output_info *so, int stream, bool ind);
 static const struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], 
cm_shader_tgsi_instruction[];
 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
-static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned 
reason);
+static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
 static int tgsi_else(struct r600_shader_ctx *ctx);
 static int tgsi_endif(struct r600_shader_ctx *ctx);
@@ -393,6 +393,15 @@ static void r600_bytecode_src(struct r600_bytecode_alu_src 
*bc_src,
 static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg,
                               unsigned dst_reg, unsigned mask);
 
+static bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx *ctx)
+{
+       if (ctx->bc->family == CHIP_HEMLOCK ||
+           ctx->bc->family == CHIP_CYPRESS ||
+           ctx->bc->family == CHIP_JUNIPER)
+               return false;
+       return true;
+}
+
 static int tgsi_last_instruction(unsigned writemask)
 {
        int i, lasti = 0;
@@ -10168,7 +10177,7 @@ static int pops(struct r600_shader_ctx *ctx, int pops)
        return 0;
 }
 
-static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
+static inline int callstack_update_max_depth(struct r600_shader_ctx *ctx,
                                               unsigned reason)
 {
        struct r600_stack_info *stack = &ctx->bc->stack;
@@ -10186,7 +10195,7 @@ static inline void callstack_update_max_depth(struct 
r600_shader_ctx *ctx,
                /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 
elements on
                 * the stack must be reserved to hold the current 
active/continue
                 * masks */
-               if (reason == FC_PUSH_VPM) {
+               if (reason == FC_PUSH_VPM || stack->push > 0) {
                        elements += 2;
                }
                break;
@@ -10212,7 +10221,7 @@ static inline void callstack_update_max_depth(struct 
r600_shader_ctx *ctx,
                 *    NOTE: it seems we also need to reserve additional element 
in some
                 *    other cases, e.g. when we have 4 levels of PUSH_VPM in 
the shader,
                 *    then STACK_SIZE should be 2 instead of 1 */
-               if (reason == FC_PUSH_VPM) {
+               if (reason == FC_PUSH_VPM || stack->push > 0) {
                        elements += 1;
                }
                break;
@@ -10231,6 +10240,7 @@ static inline void callstack_update_max_depth(struct 
r600_shader_ctx *ctx,
 
        if (entries > stack->max_entries)
                stack->max_entries = entries;
+       return elements;
 }
 
 static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
@@ -10254,7 +10264,7 @@ static inline void callstack_pop(struct r600_shader_ctx 
*ctx, unsigned reason)
        }
 }
 
-static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
+static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
 {
        switch (reason) {
        case FC_PUSH_VPM:
@@ -10262,6 +10272,7 @@ static inline void callstack_push(struct 
r600_shader_ctx *ctx, unsigned reason)
                break;
        case FC_PUSH_WQM:
                ++ctx->bc->stack.push_wqm;
+               break;
        case FC_LOOP:
                ++ctx->bc->stack.loop;
                break;
@@ -10269,7 +10280,7 @@ static inline void callstack_push(struct 
r600_shader_ctx *ctx, unsigned reason)
                assert(0);
        }
 
-       callstack_update_max_depth(ctx, reason);
+       return callstack_update_max_depth(ctx, reason);
 }
 
 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
@@ -10353,12 +10364,25 @@ static int emit_if(struct r600_shader_ctx *ctx, int 
opcode,
                   struct r600_bytecode_alu_src *src)
 {
        int alu_type = CF_OP_ALU_PUSH_BEFORE;
+       bool needs_workaround = false;
+       int elems = callstack_push(ctx, FC_PUSH_VPM);
+
+       if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1)
+               needs_workaround = true;
+
+       if (ctx->bc->chip_class == EVERGREEN && 
ctx_needs_stack_workaround_8xx(ctx)) {
+               unsigned dmod1 = (elems - 1) % ctx->bc->stack.entry_size;
+               unsigned dmod2 = (elems) % ctx->bc->stack.entry_size;
+
+               if (elems && (!dmod1 || !dmod2))
+                       needs_workaround = true;
+       }
 
        /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by
         * LOOP_STARTxxx for nested loops may put the branch stack into a state
         * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this
         * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */
-       if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) {
+       if (needs_workaround) {
                r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH);
                ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
                alu_type = CF_OP_ALU;
@@ -10370,7 +10394,6 @@ static int emit_if(struct r600_shader_ctx *ctx, int 
opcode,
 
        fc_pushlevel(ctx, FC_IF);
 
-       callstack_push(ctx, FC_PUSH_VPM);
        return 0;
 }
 

_______________________________________________
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): r600: implement callstack workaround for evergreen.

Reply via email to