On 10.05.2016 05:52, Bas Nieuwenhuizen wrote:
We need to copy the VS outputs to memory. I decided to do this
using a shader key, as the value depends on other shaders.

I also switch the fixed function TCS over to monolithic, as
otherwisze many of the user SGPR's need to be passed to the
epilog, which increases register pressure, or complexity to
avoid that. The main body of the fixed function TCS is not
that interesting to precompile anyway, since we do it on
demand and it is very small.

Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
---
  src/gallium/drivers/radeonsi/si_shader.c        | 45 +++++++++++++++++++++++++
  src/gallium/drivers/radeonsi/si_shader.h        |  1 +
  src/gallium/drivers/radeonsi/si_state_shaders.c |  3 ++
  3 files changed, 49 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 90830ee..50c48bf 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2423,6 +2423,48 @@ handle_semantic:
        }
  }

+static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
+{
+       struct si_shader_context *ctx = si_shader_context(bld_base);
+       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       LLVMValueRef invocation_id, rw_buffers, buffer, buffer_offset;
+       LLVMValueRef lds_vertex_stride, lds_vertex_offset, lds_base;
+       unsigned num_outputs, i;
+
+       invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
+
+       rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_RW_BUFFERS);
+       buffer = build_indexed_load_const(ctx, rw_buffers,
+                       lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP));
+
+       buffer_offset = LLVMGetParam(ctx->radeon_bld.main_fn, 
ctx->param_oc_lds);
+
+       lds_vertex_stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
+       lds_vertex_offset = LLVMBuildMul(gallivm->builder, invocation_id,
+                                        lds_vertex_stride, "");
+       lds_base = get_tcs_in_current_patch_offset(ctx);
+       lds_base = LLVMBuildAdd(gallivm->builder, lds_base, lds_vertex_offset, 
"");
+
+       num_outputs = 
util_last_bit64(ctx->shader->key.tcs.epilog.inputs_to_copy);
+       for (i = 0; i < num_outputs; i++) {
+               if (!((1llu << i) & ctx->shader->key.tcs.epilog.inputs_to_copy))
+                       continue;

Use u_bit_scan64, please.

Nicolai

+
+               LLVMValueRef lds_ptr = LLVMBuildAdd(gallivm->builder, lds_base,
+                                           lp_build_const_int32(gallivm, 4 * 
i),
+                                            "");
+
+               LLVMValueRef buffer_addr = get_buffer_address(ctx, 
invocation_id,
+                                             lp_build_const_int32(gallivm, i));
+
+               LLVMValueRef value = lds_load(bld_base, TGSI_TYPE_SIGNED, ~0,
+                                             lds_ptr);
+
+               build_tbuffer_store_dwords(ctx, buffer, value, 4, buffer_addr,
+                                          buffer_offset, 0);
+       }
+}
+
  static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
                                  LLVMValueRef rel_patch_id,
                                  LLVMValueRef invocation_id,
@@ -2564,6 +2606,7 @@ static void si_llvm_emit_tcs_epilogue(struct 
lp_build_tgsi_context *bld_base)
                return;
        }

+       si_copy_tcs_inputs(bld_base);
        si_write_tess_factors(bld_base, rel_patch_id, invocation_id, 
tf_lds_offset);
  }

@@ -7374,6 +7417,8 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
              shader->key.vs.as_ls != mainp->key.vs.as_ls)) ||
            (shader->selector->type == PIPE_SHADER_TESS_EVAL &&
             shader->key.tes.as_es != mainp->key.tes.as_es) ||
+           (shader->selector->type == PIPE_SHADER_TESS_CTRL &&
+            shader->key.tcs.epilog.inputs_to_copy) ||
            shader->selector->type == PIPE_SHADER_COMPUTE) {
                /* Monolithic shader (compiled as a whole, has many variants,
                 * may take a long time to compile).
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 26be25e..67b457b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -304,6 +304,7 @@ struct si_vs_epilog_bits {
  /* Common TCS bits between the shader key and the epilog key. */
  struct si_tcs_epilog_bits {
        unsigned        prim_mode:3;
+       uint64_t        inputs_to_copy;
  };

  /* Common PS bits between the shader key and the prolog key. */
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 32ac95d..f48582a 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -841,6 +841,9 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
        case PIPE_SHADER_TESS_CTRL:
                key->tcs.epilog.prim_mode =
                        
sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
+
+               if (sel == sctx->fixed_func_tcs_shader.cso)
+                       key->tcs.epilog.inputs_to_copy = 
sctx->vs_shader.cso->outputs_written;
                break;
        case PIPE_SHADER_TESS_EVAL:
                if (sctx->gs_shader.cso)

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to