Module: Mesa
Branch: master
Commit: f85488ab827412114f2cb4ff9ee54aafd751454d
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f85488ab827412114f2cb4ff9ee54aafd751454d

Author: Mike Blumenkrantz <[email protected]>
Date:   Tue Jun 30 15:10:12 2020 -0400

zink: redo slot mapping again for the last time really I mean it

now that shader compiling is happening all at once, we can store the slot
map on zink_gfx_program directly and reserve it dynamically in order to
use up only the slots that are actually being used across all shader stages

Reviewed-by: Erik Faye-Lund <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7100>

---

 .../drivers/zink/nir_to_spirv/nir_to_spirv.c       | 117 +++++----------------
 .../drivers/zink/nir_to_spirv/nir_to_spirv.h       |   3 +-
 src/gallium/drivers/zink/zink_compiler.c           |   5 +-
 src/gallium/drivers/zink/zink_compiler.h           |   3 +-
 src/gallium/drivers/zink/zink_program.c            |  28 ++++-
 src/gallium/drivers/zink/zink_program.h            |   3 +
 6 files changed, 64 insertions(+), 95 deletions(-)

diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c 
b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
index bd9655e319d..b00ad603700 100644
--- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
+++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
@@ -29,65 +29,7 @@
 #include "util/u_memory.h"
 #include "util/hash_table.h"
 
-/* this consistently maps slots to a zero-indexed value to avoid wasting slots 
*/
-static unsigned slot_pack_map[] = {
-   /* Position is builtin */
-   [VARYING_SLOT_POS] = UINT_MAX,
-   [VARYING_SLOT_COL0] = 0, /* input/output */
-   [VARYING_SLOT_COL1] = 1, /* input/output */
-   [VARYING_SLOT_FOGC] = 2, /* input/output */
-   /* TEX0-7 are deprecated, so we put them at the end of the range and hope 
nobody uses them all */
-   [VARYING_SLOT_TEX0] = VARYING_SLOT_VAR0 - 1, /* input/output */
-   [VARYING_SLOT_TEX1] = VARYING_SLOT_VAR0 - 2,
-   [VARYING_SLOT_TEX2] = VARYING_SLOT_VAR0 - 3,
-   [VARYING_SLOT_TEX3] = VARYING_SLOT_VAR0 - 4,
-   [VARYING_SLOT_TEX4] = VARYING_SLOT_VAR0 - 5,
-   [VARYING_SLOT_TEX5] = VARYING_SLOT_VAR0 - 6,
-   [VARYING_SLOT_TEX6] = VARYING_SLOT_VAR0 - 7,
-   [VARYING_SLOT_TEX7] = VARYING_SLOT_VAR0 - 8,
-
-   /* PointSize is builtin */
-   [VARYING_SLOT_PSIZ] = UINT_MAX,
-
-   [VARYING_SLOT_BFC0] = 3, /* output only */
-   [VARYING_SLOT_BFC1] = 4, /* output only */
-   [VARYING_SLOT_EDGE] = 5, /* output only */
-   [VARYING_SLOT_CLIP_VERTEX] = 6, /* output only */
-
-   /* ClipDistance is builtin */
-   [VARYING_SLOT_CLIP_DIST0] = UINT_MAX,
-   [VARYING_SLOT_CLIP_DIST1] = UINT_MAX,
-
-   /* CullDistance is builtin */
-   [VARYING_SLOT_CULL_DIST0] = UINT_MAX, /* input/output */
-   [VARYING_SLOT_CULL_DIST1] = UINT_MAX, /* never actually used */
-
-   /* PrimitiveId is builtin */
-   [VARYING_SLOT_PRIMITIVE_ID] = UINT_MAX,
-
-   /* Layer is builtin */
-   [VARYING_SLOT_LAYER] = UINT_MAX, /* input/output */
-
-   /* ViewportIndex is builtin */
-   [VARYING_SLOT_VIEWPORT] =  UINT_MAX, /* input/output */
-
-   /* FrontFacing is builtin */
-   [VARYING_SLOT_FACE] = UINT_MAX,
-
-   /* PointCoord is builtin */
-   [VARYING_SLOT_PNTC] = UINT_MAX, /* input only */
-
-   /* TessLevelOuter is builtin */
-   [VARYING_SLOT_TESS_LEVEL_OUTER] = UINT_MAX,
-   /* TessLevelInner is builtin */
-   [VARYING_SLOT_TESS_LEVEL_INNER] = UINT_MAX,
-
-   [VARYING_SLOT_BOUNDING_BOX0] = 7, /* Only appears as TCS output. */
-   [VARYING_SLOT_BOUNDING_BOX1] = 8, /* Only appears as TCS output. */
-   [VARYING_SLOT_VIEW_INDEX] = 9, /* input/output */
-   [VARYING_SLOT_VIEWPORT_MASK] = 10, /* output only */
-};
-#define NTV_MIN_RESERVED_SLOTS 11
+#define SLOT_UNSET ((unsigned char) -1)
 
 struct ntv_context {
    void *mem_ctx;
@@ -123,10 +65,10 @@ struct ntv_context {
    bool block_started;
    SpvId loop_break, loop_cont;
 
+   unsigned char *shader_slot_map;
+   unsigned char shader_slots_reserved;
+
    SpvId front_face_var, instance_id_var, vertex_id_var;
-#ifndef NDEBUG
-   bool seen_texcoord[8]; //whether we've seen a VARYING_SLOT_TEX[n] this pass
-#endif
 };
 
 static SpvId
@@ -295,25 +237,24 @@ get_glsl_type(struct ntv_context *ctx, const struct 
glsl_type *type)
    unreachable("we shouldn't get here, I think...");
 }
 
+static inline unsigned char
+reserve_slot(struct ntv_context *ctx)
+{
+   /* TODO: this should actually be clamped to the limits value as in the table
+    * in 14.1.4 of the vulkan spec, though there's not really any recourse
+    * other than aborting if we do hit it...
+    */
+   assert(ctx->shader_slots_reserved < MAX_VARYING);
+   return ctx->shader_slots_reserved++;
+}
+
 static inline unsigned
 handle_slot(struct ntv_context *ctx, unsigned slot)
 {
-   unsigned orig = slot;
-   if (slot < VARYING_SLOT_VAR0) {
-#ifndef NDEBUG
-      if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7)
-         ctx->seen_texcoord[slot - VARYING_SLOT_TEX0] = true;
-#endif
-      slot = slot_pack_map[slot];
-      if (slot == UINT_MAX)
-         debug_printf("unhandled varying slot: %s\n", 
gl_varying_slot_name(orig));
-   } else {
-      slot -= VARYING_SLOT_VAR0 - NTV_MIN_RESERVED_SLOTS;
-      assert(slot <= VARYING_SLOT_VAR0 - 8 ||
-             !ctx->seen_texcoord[VARYING_SLOT_VAR0 - slot - 1]);
-
-   }
-   assert(slot < VARYING_SLOT_VAR0);
+   if (ctx->shader_slot_map[slot] == SLOT_UNSET)
+      ctx->shader_slot_map[slot] = reserve_slot(ctx);
+   slot = ctx->shader_slot_map[slot];
+   assert(slot < MAX_VARYING);
    return slot;
 }
 
@@ -901,8 +842,7 @@ get_output_type(struct ntv_context *ctx, unsigned 
register_index, unsigned num_c
 /* for streamout create new outputs, as streamout can be done on individual 
components,
    from complete outputs, so we just can't use the created packed outputs */
 static void
-emit_so_info(struct ntv_context *ctx, unsigned max_output_location,
-             const struct zink_so_info *so_info)
+emit_so_info(struct ntv_context *ctx, const struct zink_so_info *so_info)
 {
    for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) {
       struct pipe_stream_output so_output = so_info->so_info.output[i];
@@ -924,16 +864,9 @@ emit_so_info(struct ntv_context *ctx, unsigned 
max_output_location,
       /* output location is incremented by VARYING_SLOT_VAR0 for non-builtins 
in vtn,
        * so we need to ensure that the new xfb location slot doesn't conflict 
with any previously-emitted
        * outputs.
-       *
-       * if there's no previous outputs that take up user slots (VAR0+) then 
we can start right after the
-       * glsl builtin reserved slots, otherwise we start just after the 
adjusted user output slot
        */
-      uint32_t location = NTV_MIN_RESERVED_SLOTS + i;
-      if (max_output_location >= VARYING_SLOT_VAR0)
-         location = max_output_location - VARYING_SLOT_VAR0 + 1 + i;
+      uint32_t location = reserve_slot(ctx);
       assert(location < VARYING_SLOT_VAR0);
-      assert(location <= VARYING_SLOT_VAR0 - 8 ||
-             !ctx->seen_texcoord[VARYING_SLOT_VAR0 - location - 1]);
       spirv_builder_emit_location(&ctx->builder, var_id, location);
 
       /* note: gl_ClipDistance[4] can the 0-indexed member of 
VARYING_SLOT_CLIP_DIST1 here,
@@ -2236,7 +2169,8 @@ emit_cf_list(struct ntv_context *ctx, struct exec_list 
*list)
 }
 
 struct spirv_shader *
-nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info)
+nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info,
+             unsigned char *shader_slot_map, unsigned char 
*shader_slots_reserved)
 {
    struct spirv_shader *ret = NULL;
 
@@ -2279,6 +2213,8 @@ nir_to_spirv(struct nir_shader *s, const struct 
zink_so_info *so_info)
    }
 
    ctx.stage = s->info.stage;
+   ctx.shader_slot_map = shader_slot_map;
+   ctx.shader_slots_reserved = *shader_slots_reserved;
    ctx.GLSL_std_450 = spirv_builder_import(&ctx.builder, "GLSL.std.450");
    spirv_builder_emit_source(&ctx.builder, SpvSourceLanguageGLSL, 450);
 
@@ -2329,7 +2265,7 @@ nir_to_spirv(struct nir_shader *s, const struct 
zink_so_info *so_info)
 
 
    if (so_info)
-      emit_so_info(&ctx, util_last_bit64(s->info.outputs_written), so_info);
+      emit_so_info(&ctx, so_info);
    /* we have to reverse iterate to match what's done in zink_compiler.c */
    foreach_list_typed_reverse(nir_variable, var, node, &s->variables)
       if (_nir_shader_variable_has_mode(var, nir_var_uniform |
@@ -2421,6 +2357,7 @@ nir_to_spirv(struct nir_shader *s, const struct 
zink_so_info *so_info)
    assert(ret->num_words == num_words);
 
    ralloc_free(ctx.mem_ctx);
+   *shader_slots_reserved = ctx.shader_slots_reserved;
 
    return ret;
 
diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h 
b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h
index 77d77add4e4..04543c31915 100644
--- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h
+++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h
@@ -42,7 +42,8 @@ struct nir_shader;
 struct pipe_stream_output_info;
 
 struct spirv_shader *
-nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info);
+nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info,
+             unsigned char *shader_slot_map, unsigned char 
*shader_slots_reserved);
 
 void
 spirv_shader_delete(struct spirv_shader *s);
diff --git a/src/gallium/drivers/zink/zink_compiler.c 
b/src/gallium/drivers/zink/zink_compiler.c
index 87b9a785cef..ad2e6b0588e 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -217,11 +217,12 @@ update_so_info(struct zink_shader *sh,
 }
 
 VkShaderModule
-zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs)
+zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
+                    unsigned char *shader_slot_map, unsigned char 
*shader_slots_reserved)
 {
    VkShaderModule mod = VK_NULL_HANDLE;
    void *streamout = zs->streamout.so_info_slots ? &zs->streamout : NULL;
-   struct spirv_shader *spirv = nir_to_spirv(zs->nir, streamout);
+   struct spirv_shader *spirv = nir_to_spirv(zs->nir, streamout, 
shader_slot_map, shader_slots_reserved);
    assert(spirv);
 
    if (zink_debug & ZINK_DEBUG_SPIRV) {
diff --git a/src/gallium/drivers/zink/zink_compiler.h 
b/src/gallium/drivers/zink/zink_compiler.h
index 810be0163bd..3a473bdeaa9 100644
--- a/src/gallium/drivers/zink/zink_compiler.h
+++ b/src/gallium/drivers/zink/zink_compiler.h
@@ -72,7 +72,8 @@ struct zink_shader {
 };
 
 VkShaderModule
-zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs);
+zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
+                    unsigned char *shader_slot_map, unsigned char 
*shader_slots_reserved);
 
 struct zink_shader *
 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
diff --git a/src/gallium/drivers/zink/zink_program.c 
b/src/gallium/drivers/zink/zink_program.c
index 0bcaa4a03e7..f7bd9ca95fb 100644
--- a/src/gallium/drivers/zink/zink_program.c
+++ b/src/gallium/drivers/zink/zink_program.c
@@ -146,13 +146,15 @@ update_shader_modules(struct zink_context *ctx, struct 
zink_shader *stages[ZINK_
       unsigned type = u_bit_scan(&dirty_shader_stages);
       dirty[tgsi_processor_to_shader_stage(type)] = stages[type];
    }
+
    for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
       enum pipe_shader_type type = pipe_shader_type_from_mesa(i);
       if (dirty[i]) {
          prog->modules[type] = CALLOC_STRUCT(zink_shader_module);
          assert(prog->modules[type]);
          pipe_reference_init(&prog->modules[type]->reference, 1);
-         prog->modules[type]->shader = 
zink_shader_compile(zink_screen(ctx->base.screen), dirty[i]);
+         prog->modules[type]->shader = 
zink_shader_compile(zink_screen(ctx->base.screen), dirty[i],
+                                                           
prog->shader_slot_map, &prog->shader_slots_reserved);
       } else if (stages[type]) /* reuse existing shader module */
          zink_shader_module_reference(zink_screen(ctx->base.screen), 
&prog->modules[type], ctx->curr_program->modules[type]);
       prog->shaders[type] = stages[type];
@@ -172,6 +174,28 @@ equals_gfx_pipeline_state(const void *a, const void *b)
    return memcmp(a, b, offsetof(struct zink_gfx_pipeline_state, hash)) == 0;
 }
 
+static void
+init_slot_map(struct zink_context *ctx, struct zink_gfx_program *prog)
+{
+   unsigned existing_shaders = 0;
+
+   /* if there's a case where we'll be reusing any shaders, we need to reuse 
the slot map too */
+   if (ctx->curr_program) {
+      for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
+          if (ctx->curr_program->shaders[i])
+             existing_shaders |= 1 << i;
+      }
+   }
+   if (ctx->dirty_shader_stages == existing_shaders || !existing_shaders)
+      /* all shaders are being recompiled: new slot map */
+      memset(prog->shader_slot_map, -1, sizeof(prog->shader_slot_map));
+   else {
+      /* at least some shaders are being reused: use existing slot map so 
locations match up */
+      memcpy(prog->shader_slot_map, ctx->curr_program->shader_slot_map, 
sizeof(prog->shader_slot_map));
+      prog->shader_slots_reserved = ctx->curr_program->shader_slots_reserved;
+   }
+}
+
 struct zink_gfx_program *
 zink_create_gfx_program(struct zink_context *ctx,
                         struct zink_shader *stages[ZINK_SHADER_COUNT])
@@ -183,6 +207,8 @@ zink_create_gfx_program(struct zink_context *ctx,
 
    pipe_reference_init(&prog->reference, 1);
 
+   init_slot_map(ctx, prog);
+
    update_shader_modules(ctx, stages, prog);
 
    for (int i = 0; i < ARRAY_SIZE(prog->pipelines); ++i) {
diff --git a/src/gallium/drivers/zink/zink_program.h 
b/src/gallium/drivers/zink/zink_program.h
index 5e68783a2f8..82900c92201 100644
--- a/src/gallium/drivers/zink/zink_program.h
+++ b/src/gallium/drivers/zink/zink_program.h
@@ -26,6 +26,7 @@
 
 #include <vulkan/vulkan.h>
 
+#include "compiler/shader_enums.h"
 #include "pipe/p_state.h"
 #include "util/u_inlines.h"
 
@@ -48,6 +49,8 @@ struct zink_gfx_program {
 
    struct zink_shader_module *modules[ZINK_SHADER_COUNT]; // compute stage 
doesn't belong here
    struct zink_shader *shaders[ZINK_SHADER_COUNT];
+   unsigned char shader_slot_map[VARYING_SLOT_MAX];
+   unsigned char shader_slots_reserved;
    VkDescriptorSetLayout dsl;
    VkPipelineLayout layout;
    unsigned num_descriptors;

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to