On 15.02.2016 18:59, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

It only exports the primitive ID.
Also used by TES when it's compiled as VS.

The VS input location of the primitive ID input is v2.

So the reason for having two unused outputs/return values of the main VS is so that primitive ID can get passed through without any moves? Sounds good, but may be worth documenting e.g. where VS_EPILOG_PRIMID_LOC is defined.

Nicolai

---
  src/gallium/drivers/radeonsi/si_pipe.c   |   2 +-
  src/gallium/drivers/radeonsi/si_pipe.h   |   1 +
  src/gallium/drivers/radeonsi/si_shader.c | 172 +++++++++++++++++++++++++++++--
  src/gallium/drivers/radeonsi/si_shader.h |   4 +
  4 files changed, 168 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 7ce9570..2b5ce3a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -539,7 +539,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
        struct si_screen *sscreen = (struct si_screen *)pscreen;
        struct si_shader_part *parts[] = {
                sscreen->vs_prologs,
-               /* this will be filled with other shader parts */
+               sscreen->vs_epilogs,
        };
        unsigned i;

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index f4bafc2..8d98779 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -90,6 +90,7 @@ struct si_screen {

        pipe_mutex                      shader_parts_mutex;
        struct si_shader_part           *vs_prologs;
+       struct si_shader_part           *vs_epilogs;
  };

  struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index fbb8394..0085c43 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -129,6 +129,7 @@ static void si_init_shader_ctx(struct si_shader_context 
*ctx,
                               LLVMTargetMachineRef tm,
                               struct tgsi_shader_info *info);

+#define VS_EPILOG_PRIMID_LOC 2

  #define PERSPECTIVE_BASE 0
  #define LINEAR_BASE 9
@@ -2230,16 +2231,26 @@ static void si_llvm_emit_vs_epilogue(struct 
lp_build_tgsi_context *bld_base)
                                              "");
        }

-       /* Export PrimitiveID when PS needs it. */
-       if (si_vs_exports_prim_id(ctx->shader)) {
-               outputs[i].name = TGSI_SEMANTIC_PRIMID;
-               outputs[i].sid = 0;
-               outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
-                                              get_primitive_id(bld_base, 0));
-               outputs[i].values[1] = bld_base->base.undef;
-               outputs[i].values[2] = bld_base->base.undef;
-               outputs[i].values[3] = bld_base->base.undef;
-               i++;
+       if (ctx->is_monolithic) {
+               /* Export PrimitiveID when PS needs it. */
+               if (si_vs_exports_prim_id(ctx->shader)) {
+                       outputs[i].name = TGSI_SEMANTIC_PRIMID;
+                       outputs[i].sid = 0;
+                       outputs[i].values[0] = bitcast(bld_base, 
TGSI_TYPE_FLOAT,
+                                                      
get_primitive_id(bld_base, 0));
+                       outputs[i].values[1] = bld_base->base.undef;
+                       outputs[i].values[2] = bld_base->base.undef;
+                       outputs[i].values[3] = bld_base->base.undef;
+                       i++;
+               }
+       } else {
+               /* Return the primitive ID from the LLVM function. */
+               ctx->return_value =
+                       LLVMBuildInsertValue(gallivm->builder,
+                                            ctx->return_value,
+                                            bitcast(bld_base, TGSI_TYPE_FLOAT,
+                                                    get_primitive_id(bld_base, 
0)),
+                                            VS_EPILOG_PRIMID_LOC, "");
        }

        si_llvm_export_vs(bld_base, outputs, i);
@@ -3724,6 +3735,11 @@ static void create_function(struct si_shader_context 
*ctx)

                        for (i = 0; i < shader->selector->info.num_inputs; i++)
                                params[num_params++] = ctx->i32;
+
+                       /* PrimitiveID output. */
+                       if (!shader->key.vs.as_es && !shader->key.vs.as_ls)
+                               for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
+                                       returns[num_returns++] = ctx->f32;
                }
                break;

@@ -3758,6 +3774,11 @@ static void create_function(struct si_shader_context 
*ctx)
                params[ctx->param_tes_v = num_params++] = ctx->f32;
                params[ctx->param_tes_rel_patch_id = num_params++] = ctx->i32;
                params[ctx->param_tes_patch_id = num_params++] = ctx->i32;
+
+               /* PrimitiveID output. */
+               if (!ctx->is_monolithic && !shader->key.tes.as_es)
+                       for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
+                               returns[num_returns++] = ctx->f32;
                break;

        case TGSI_PROCESSOR_GEOMETRY:
@@ -4856,6 +4877,111 @@ static bool si_compile_vs_prolog(struct si_screen 
*sscreen,
        return status;
  }

+/**
+ * Compile the vertex shader epilog. This is also used by the tessellation
+ * evaluation shader compiled as VS.
+ *
+ * The input is PrimitiveID.
+ *
+ * If PrimitiveID is required by the pixel shader, export it.
+ * Otherwise, do nothing.
+ */
+static bool si_compile_vs_epilog(struct si_screen *sscreen,
+                                LLVMTargetMachineRef tm,
+                                struct pipe_debug_callback *debug,
+                                struct si_shader_part *out)
+{
+       union si_shader_part_key *key = &out->key;
+       struct si_shader_context ctx;
+       struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+       struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base;
+       LLVMTypeRef params[5];
+       int num_params, i;
+       bool status = true;
+
+       si_init_shader_ctx(&ctx, sscreen, NULL, tm, NULL);
+       ctx.type = TGSI_PROCESSOR_VERTEX;
+
+       /* Declare input VGPRs. */
+       num_params = key->vs_epilog.states.export_prim_id ?
+                          (VS_EPILOG_PRIMID_LOC + 1) : 0;
+       assert(num_params <= ARRAY_SIZE(params));
+
+       for (i = 0; i < num_params; i++)
+               params[i] = ctx.f32;
+
+       /* Create the function. */
+       si_create_function(&ctx, NULL, 0, params, num_params,
+                          -1, -1);
+
+       /* Emit exports. */
+       if (key->vs_epilog.states.export_prim_id) {
+               struct lp_build_context *base = &bld_base->base;
+               struct lp_build_context *uint = &bld_base->uint_bld;
+               LLVMValueRef args[9];
+
+               args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled 
channels */
+               args[1] = uint->zero; /* whether the EXEC mask is valid */
+               args[2] = uint->zero; /* DONE bit */
+               args[3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_PARAM +
+                                              
key->vs_epilog.prim_id_param_offset);
+               args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
+               args[5] = LLVMGetParam(ctx.radeon_bld.main_fn,
+                                      VS_EPILOG_PRIMID_LOC); /* X */
+               args[6] = uint->undef; /* Y */
+               args[7] = uint->undef; /* Z */
+               args[8] = uint->undef; /* W */
+
+               lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+                                  
LLVMVoidTypeInContext(base->gallivm->context),
+                                  args, 9, 0);
+       }
+
+       /* Compile. */
+       LLVMBuildRet(gallivm->builder, ctx.return_value);
+       radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+       if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+                           gallivm->module, debug, ctx.type,
+                           "Vertex Shader Epilog"))
+               status = false;
+
+       radeon_llvm_dispose(&ctx.radeon_bld);
+       return status;
+}
+
+/**
+ * Create & compile a vertex shader epilog. This a helper used by VS and TES.
+ */
+static bool si_get_vs_epilog(struct si_screen *sscreen,
+                            LLVMTargetMachineRef tm,
+                            struct si_shader *shader,
+                            struct pipe_debug_callback *debug,
+                            struct si_vs_epilog_bits *states)
+{
+       union si_shader_part_key epilog_key;
+
+       memset(&epilog_key, 0, sizeof(epilog_key));
+       epilog_key.vs_epilog.states = *states;
+
+       /* Set up the PrimitiveID output. */
+       if (shader->key.vs.epilog.export_prim_id) {
+               unsigned index = shader->selector->info.num_outputs;
+               unsigned offset = shader->nr_param_exports++;
+
+               epilog_key.vs_epilog.prim_id_param_offset = offset;
+               shader->vs_output_param_offset[index] = offset;
+       }
+
+       shader->epilog = si_get_shader_part(sscreen, &sscreen->vs_epilogs,
+                                           &epilog_key, tm, debug,
+                                           si_compile_vs_epilog);
+       return shader->epilog != NULL;
+}
+
+/**
+ * Select and compile (or reuse) vertex shader parts (prolog & epilog).
+ */
  static bool si_shader_select_vs_parts(struct si_screen *sscreen,
                                      LLVMTargetMachineRef tm,
                                      struct si_shader *shader,
@@ -4881,6 +5007,12 @@ static bool si_shader_select_vs_parts(struct si_screen 
*sscreen,
                        return false;
        }

+       /* Get the epilog. */
+       if (!shader->key.vs.as_es && !shader->key.vs.as_ls &&
+           !si_get_vs_epilog(sscreen, tm, shader, debug,
+                             &shader->key.vs.epilog))
+               return false;
+
        /* Set the instanceID flag. */
        for (i = 0; i < info->num_inputs; i++)
                if (prolog_key.vs_prolog.states.instance_divisors[i])
@@ -4889,6 +5021,22 @@ static bool si_shader_select_vs_parts(struct si_screen 
*sscreen,
        return true;
  }

+/**
+ * Select and compile (or reuse) TES parts (epilog).
+ */
+static bool si_shader_select_tes_parts(struct si_screen *sscreen,
+                                      LLVMTargetMachineRef tm,
+                                      struct si_shader *shader,
+                                      struct pipe_debug_callback *debug)
+{
+       if (shader->key.tes.as_es)
+               return true;
+
+       /* TES compiled as VS. */
+       return si_get_vs_epilog(sscreen, tm, shader, debug,
+                               &shader->key.tes.epilog);
+}
+
  int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                     struct si_shader *shader,
                     struct pipe_debug_callback *debug)
@@ -4907,6 +5055,10 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
                        if (!si_shader_select_vs_parts(sscreen, tm, shader, 
debug))
                                return -1;
                        break;
+               case PIPE_SHADER_TESS_EVAL:
+                       if (!si_shader_select_tes_parts(sscreen, tm, shader, 
debug))
+                               return -1;
+                       break;
                }

                /* Update SGPR and VGPR counts. */
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index e3ba4c7..d7388a4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -274,6 +274,10 @@ union si_shader_part_key {
                unsigned        num_input_sgprs:5;
                unsigned        last_input:4;
        } vs_prolog;
+       struct {
+               struct si_vs_epilog_bits states;
+               unsigned        prim_id_param_offset:5;
+       } vs_epilog;
  };

  union si_shader_key {

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to