On 15.02.2016 18:59, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

---
  src/gallium/drivers/radeonsi/si_pipe.c   |   1 +
  src/gallium/drivers/radeonsi/si_pipe.h   |   1 +
  src/gallium/drivers/radeonsi/si_shader.c | 163 ++++++++++++++++++++++++++++---
  src/gallium/drivers/radeonsi/si_shader.h |   3 +
  4 files changed, 155 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 2b5ce3a..645d418 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -540,6 +540,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
        struct si_shader_part *parts[] = {
                sscreen->vs_prologs,
                sscreen->vs_epilogs,
+               sscreen->tcs_epilogs,
        };
        unsigned i;

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 8d98779..d9175b9 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -91,6 +91,7 @@ struct si_screen {
        pipe_mutex                      shader_parts_mutex;
        struct si_shader_part           *vs_prologs;
        struct si_shader_part           *vs_epilogs;
+       struct si_shader_part           *tcs_epilogs;
  };

  struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 0085c43..bc6f8cd 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -109,9 +109,11 @@ struct si_shader_context
        LLVMTypeRef i1;
        LLVMTypeRef i8;
        LLVMTypeRef i32;
+       LLVMTypeRef i64;
        LLVMTypeRef i128;
        LLVMTypeRef f32;
        LLVMTypeRef v16i8;
+       LLVMTypeRef v2i32;
        LLVMTypeRef v4i32;
        LLVMTypeRef v4f32;
        LLVMTypeRef v8i32;
@@ -2078,14 +2080,51 @@ static void si_write_tess_factors(struct 
lp_build_tgsi_context *bld_base,
  static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
  {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef invocation_id;
+       LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;

+       rel_patch_id = get_rel_patch_id(ctx);
        invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
+       tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);

-       si_write_tess_factors(bld_base,
-                             get_rel_patch_id(ctx),
-                             invocation_id,
-                             get_tcs_out_current_patch_data_offset(ctx));
+       if (!ctx->is_monolithic) {
+               /* Return epilog parameters from this function. */
+               LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+               LLVMValueRef ret = ctx->return_value;
+               LLVMValueRef rw_buffers, rw0, rw1, tf_soffset;
+               unsigned vgpr;
+
+               /* RW_BUFFERS pointer */
+               rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn,
+                                         SI_PARAM_RW_BUFFERS);
+               rw_buffers = LLVMBuildPtrToInt(builder, rw_buffers, ctx->i64, 
"");
+               rw_buffers = LLVMBuildBitCast(builder, rw_buffers, ctx->v2i32, 
"");
+               rw0 = LLVMBuildExtractElement(builder, rw_buffers,
+                                             bld_base->uint_bld.zero, "");
+               rw1 = LLVMBuildExtractElement(builder, rw_buffers,
+                                             bld_base->uint_bld.one, "");
+               ret = LLVMBuildInsertValue(builder, ret, rw0, 0, "");
+               ret = LLVMBuildInsertValue(builder, ret, rw1, 1, "");

Ugh, that's a bit ugly even if it ends up being a no-op in the final binary. Doesn't LLVM at least support vector return values or maybe even i64?

Nicolai

+               /* Tess factor buffer soffset is after user SGPRs. */
+               tf_soffset = LLVMGetParam(ctx->radeon_bld.main_fn,
+                                         SI_PARAM_TESS_FACTOR_OFFSET);
+               ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
+                                          SI_TCS_NUM_USER_SGPR, "");
+
+               /* VGPRs */
+               rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
+               invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, 
invocation_id);
+               tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, 
tf_lds_offset);
+
+               vgpr = SI_TCS_NUM_USER_SGPR + 1;
+               ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, 
"");
+               ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, 
"");
+               ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, 
"");
+               ctx->return_value = ret;
+               return;
+       }
+
+       si_write_tess_factors(bld_base, rel_patch_id, invocation_id, 
tf_lds_offset);
  }

  static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
@@ -3679,12 +3718,11 @@ static void create_function(struct si_shader_context 
*ctx)
        struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        struct si_shader *shader = ctx->shader;
-       LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v2i32, v3i32;
+       LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32;
        LLVMTypeRef returns[16+32*4];
        unsigned i, last_array_pointer, last_sgpr, num_params;
        unsigned num_returns = 0;

-       v2i32 = LLVMVectorType(ctx->i32, 2);
        v3i32 = LLVMVectorType(ctx->i32, 3);

        params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, 
SI_NUM_RW_BUFFERS);
@@ -3754,6 +3792,15 @@ static void create_function(struct si_shader_context 
*ctx)
                params[SI_PARAM_PATCH_ID] = ctx->i32;
                params[SI_PARAM_REL_IDS] = ctx->i32;
                num_params = SI_PARAM_REL_IDS+1;
+
+               if (!ctx->is_monolithic) {
+                       /* PARAM_TESS_FACTOR_OFFSET is after user SGPRs. */
+                       for (i = 0; i <= SI_TCS_NUM_USER_SGPR; i++)
+                               returns[num_returns++] = ctx->i32; /* SGPRs */
+
+                       for (i = 0; i < 3; i++)
+                               returns[num_returns++] = ctx->f32; /* VGPRs */
+               }
                break;

        case TGSI_PROCESSOR_TESS_EVAL:
@@ -3802,13 +3849,13 @@ static void create_function(struct si_shader_context 
*ctx)
                params[SI_PARAM_ALPHA_REF] = ctx->f32;
                params[SI_PARAM_PRIM_MASK] = ctx->i32;
                last_sgpr = SI_PARAM_PRIM_MASK;
-               params[SI_PARAM_PERSP_SAMPLE] = v2i32;
-               params[SI_PARAM_PERSP_CENTER] = v2i32;
-               params[SI_PARAM_PERSP_CENTROID] = v2i32;
+               params[SI_PARAM_PERSP_SAMPLE] = ctx->v2i32;
+               params[SI_PARAM_PERSP_CENTER] = ctx->v2i32;
+               params[SI_PARAM_PERSP_CENTROID] = ctx->v2i32;
                params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
-               params[SI_PARAM_LINEAR_SAMPLE] = v2i32;
-               params[SI_PARAM_LINEAR_CENTER] = v2i32;
-               params[SI_PARAM_LINEAR_CENTROID] = v2i32;
+               params[SI_PARAM_LINEAR_SAMPLE] = ctx->v2i32;
+               params[SI_PARAM_LINEAR_CENTER] = ctx->v2i32;
+               params[SI_PARAM_LINEAR_CENTROID] = ctx->v2i32;
                params[SI_PARAM_LINE_STIPPLE_TEX] = ctx->f32;
                params[SI_PARAM_POS_X_FLOAT] = ctx->f32;
                params[SI_PARAM_POS_Y_FLOAT] = ctx->f32;
@@ -4491,9 +4538,11 @@ static void si_init_shader_ctx(struct si_shader_context 
*ctx,
        ctx->i1 = LLVMInt1TypeInContext(ctx->radeon_bld.gallivm.context);
        ctx->i8 = LLVMInt8TypeInContext(ctx->radeon_bld.gallivm.context);
        ctx->i32 = LLVMInt32TypeInContext(ctx->radeon_bld.gallivm.context);
+       ctx->i64 = LLVMInt64TypeInContext(ctx->radeon_bld.gallivm.context);
        ctx->i128 = LLVMIntTypeInContext(ctx->radeon_bld.gallivm.context, 128);
        ctx->f32 = LLVMFloatTypeInContext(ctx->radeon_bld.gallivm.context);
        ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
+       ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
        ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
        ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
        ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
@@ -5037,6 +5086,90 @@ static bool si_shader_select_tes_parts(struct si_screen 
*sscreen,
                                &shader->key.tes.epilog);
  }

+/**
+ * Compile the TCS epilog. This writes tesselation factors to memory based on
+ * the output primitive type of the tesselator (determined by TES).
+ */
+static bool si_compile_tcs_epilog(struct si_screen *sscreen,
+                                 LLVMTargetMachineRef tm,
+                                 struct pipe_debug_callback *debug,
+                                 struct si_shader_part *out)
+{
+       union si_shader_part_key *key = &out->key;
+       struct si_shader shader = {};
+       struct si_shader_context ctx;
+       struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+       struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base;
+       LLVMTypeRef params[16];
+       LLVMValueRef func;
+       int last_array_pointer, last_sgpr, num_params;
+       bool status = true;
+
+       si_init_shader_ctx(&ctx, sscreen, &shader, tm, NULL);
+       ctx.type = TGSI_PROCESSOR_TESS_CTRL;
+       shader.key.tcs.epilog = key->tcs_epilog.states;
+
+       /* Declare inputs. Only RW_BUFFERS and TESS_FACTOR_OFFSET are used. */
+       params[SI_PARAM_RW_BUFFERS] = const_array(ctx.v16i8, SI_NUM_RW_BUFFERS);
+       last_array_pointer = SI_PARAM_RW_BUFFERS;
+       params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
+       params[SI_PARAM_SAMPLERS] = ctx.i64;
+       params[SI_PARAM_UNUSED] = ctx.i64;
+       params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32;
+       params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32;
+       params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32;
+       params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx.i32;
+       last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
+       num_params = last_sgpr + 1;
+
+       params[num_params++] = ctx.i32; /* patch index within the wave 
(REL_PATCH_ID) */
+       params[num_params++] = ctx.i32; /* invocation ID within the patch */
+       params[num_params++] = ctx.i32; /* LDS offset where tess factors should 
be loaded from */
+
+       /* Create the function. */
+       si_create_function(&ctx, NULL, 0, params, num_params,
+                          last_array_pointer, last_sgpr);
+       declare_tess_lds(&ctx);
+       func = ctx.radeon_bld.main_fn;
+
+       si_write_tess_factors(bld_base,
+                             LLVMGetParam(func, last_sgpr + 1),
+                             LLVMGetParam(func, last_sgpr + 2),
+                             LLVMGetParam(func, last_sgpr + 3));
+
+       /* Compile. */
+       LLVMBuildRet(gallivm->builder, ctx.return_value);
+       radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+       if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+                           gallivm->module, debug, ctx.type,
+                           "Tessellation Control Shader Epilog"))
+               status = false;
+
+       radeon_llvm_dispose(&ctx.radeon_bld);
+       return status;
+}
+
+/**
+ * Select and compile (or reuse) TCS parts (epilog).
+ */
+static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
+                                      LLVMTargetMachineRef tm,
+                                      struct si_shader *shader,
+                                      struct pipe_debug_callback *debug)
+{
+       union si_shader_part_key epilog_key;
+
+       /* Get the epilog. */
+       memset(&epilog_key, 0, sizeof(epilog_key));
+       epilog_key.tcs_epilog.states = shader->key.tcs.epilog;
+
+       shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs,
+                                           &epilog_key, tm, debug,
+                                           si_compile_tcs_epilog);
+       return shader->epilog != NULL;
+}
+
  int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                     struct si_shader *shader,
                     struct pipe_debug_callback *debug)
@@ -5055,6 +5188,10 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
                        if (!si_shader_select_vs_parts(sscreen, tm, shader, 
debug))
                                return -1;
                        break;
+               case PIPE_SHADER_TESS_CTRL:
+                       if (!si_shader_select_tcs_parts(sscreen, tm, shader, 
debug))
+                               return -1;
+                       break;
                case PIPE_SHADER_TESS_EVAL:
                        if (!si_shader_select_tes_parts(sscreen, tm, shader, 
debug))
                                return -1;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index d7388a4..e2b9534 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -278,6 +278,9 @@ union si_shader_part_key {
                struct si_vs_epilog_bits states;
                unsigned        prim_id_param_offset:5;
        } vs_epilog;
+       struct {
+               struct si_tcs_epilog_bits states;
+       } tcs_epilog;
  };

  union si_shader_key {

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to