Module: Mesa
Branch: main
Commit: 1542f3eb470ffefe4b0b30c0547eb72f4fca712c
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1542f3eb470ffefe4b0b30c0547eb72f4fca712c

Author: Mike Blumenkrantz <[email protected]>
Date:   Tue Aug 24 16:01:56 2021 -0400

zink: decompose vertex attribs into single components when not supported

this avoids vbuf in a lot more cases on radv where 3component attribs aren't 
supported

Reviewed-by: Dave Airlie <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12771>

---

 src/gallium/drivers/zink/zink_compiler.c    | 89 +++++++++++++++++++++++++++--
 src/gallium/drivers/zink/zink_pipeline.h    |  2 +
 src/gallium/drivers/zink/zink_program.c     | 25 +++++---
 src/gallium/drivers/zink/zink_screen.c      | 12 +++-
 src/gallium/drivers/zink/zink_shader_keys.h | 16 +++++-
 src/gallium/drivers/zink/zink_state.c       | 70 +++++++++++++++++++++--
 src/gallium/drivers/zink/zink_state.h       |  3 +
 7 files changed, 196 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/zink/zink_compiler.c 
b/src/gallium/drivers/zink/zink_compiler.c
index 3b02d29d8c2..17d5f9d29f5 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -564,6 +564,79 @@ update_so_info(struct zink_shader *zs, const struct 
pipe_stream_output_info *so_
    zs->streamout.have_xfb = !!zs->streamout.so_info.num_outputs;
 }
 
+struct decompose_state {
+  nir_variable **split;
+  bool needs_w;
+};
+
+static bool
+lower_attrib(nir_builder *b, nir_instr *instr, void *data)
+{
+   struct decompose_state *state = data;
+   nir_variable **split = state->split;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_load_deref)
+      return false;
+   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+   if (var != split[0])
+      return false;
+   unsigned num_components = glsl_get_vector_elements(split[0]->type);
+   b->cursor = nir_after_instr(instr);
+   nir_ssa_def *loads[4];
+   for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : 
num_components); i++)
+      loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
+   if (state->needs_w) {
+      /* oob load w comopnent to get correct value for int/float */
+      loads[3] = nir_channel(b, loads[0], 3);
+      loads[0] = nir_channel(b, loads[0], 0);
+   }
+   nir_ssa_def *new_load = nir_vec(b, loads, num_components);
+   nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
+   nir_instr_remove_v(instr);
+   return true;
+}
+
+static bool
+decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t 
decomposed_attrs_without_w)
+{
+   uint32_t bits = 0;
+   nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
+      bits |= BITFIELD_BIT(var->data.driver_location);
+   bits = ~bits;
+   u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
+      nir_variable *split[5];
+      struct decompose_state state;
+      state.split = split;
+      nir_variable *var = nir_find_variable_with_driver_location(nir, 
nir_var_shader_in, location);
+      assert(var);
+      split[0] = var;
+      bits |= BITFIELD_BIT(var->data.driver_location);
+      const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? 
var->type : glsl_get_array_element(var->type);
+      unsigned num_components = glsl_get_vector_elements(var->type);
+      state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 
0 && num_components == 4;
+      for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : 
num_components); i++) {
+         split[i+1] = nir_variable_clone(var, nir);
+         split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
+         if (decomposed_attrs_without_w & BITFIELD_BIT(location))
+            split[i+1]->type = !i && num_components == 4 ? var->type : 
new_type;
+         else
+            split[i+1]->type = new_type;
+         split[i+1]->data.driver_location = ffs(bits) - 1;
+         bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
+         nir_shader_add_variable(nir, split[i+1]);
+      }
+      var->data.mode = nir_var_shader_temp;
+      nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, 
&state);
+   }
+   nir_fixup_deref_modes(nir);
+   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
+   optimize_nir(nir);
+   return true;
+}
+
 static void
 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned 
*reserved, unsigned char *slot_map)
 {
@@ -731,17 +804,25 @@ zink_shader_compile(struct zink_screen *screen, struct 
zink_shader *zs, nir_shad
 
       /* TODO: use a separate mem ctx here for ralloc */
       switch (zs->nir->info.stage) {
-      case MESA_SHADER_VERTEX:
+      case MESA_SHADER_VERTEX: {
+         uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
+         const struct zink_vs_key *vs_key = zink_vs_key(key);
+         decomposed_attrs = vs_key->decomposed_attrs;
+         decomposed_attrs_without_w = vs_key->decomposed_attrs_without_w;
+         if (decomposed_attrs || decomposed_attrs_without_w)
+            NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, 
decomposed_attrs_without_w);
+         FALLTHROUGH;
+      }
       case MESA_SHADER_TESS_EVAL:
       case MESA_SHADER_GEOMETRY:
-         if (zink_vs_key(key)->last_vertex_stage) {
+         if (zink_vs_key_base(key)->last_vertex_stage) {
             if (zs->streamout.have_xfb)
                streamout = &zs->streamout;
 
-            if (!zink_vs_key(key)->clip_halfz) {
+            if (!zink_vs_key_base(key)->clip_halfz) {
                NIR_PASS_V(nir, nir_lower_clip_halfz);
             }
-            if (zink_vs_key(key)->push_drawid) {
+            if (zink_vs_key_base(key)->push_drawid) {
                NIR_PASS_V(nir, lower_drawid);
             }
          }
diff --git a/src/gallium/drivers/zink/zink_pipeline.h 
b/src/gallium/drivers/zink/zink_pipeline.h
index d4239b098c9..eccfa443c28 100644
--- a/src/gallium/drivers/zink/zink_pipeline.h
+++ b/src/gallium/drivers/zink/zink_pipeline.h
@@ -78,6 +78,8 @@ struct zink_gfx_pipeline_state {
    uint8_t coord_replace_bits;
    bool coord_replace_yinvert;
    bool drawid_broken;
+   uint32_t decomposed_attrs;
+   uint32_t decomposed_attrs_without_w;
    struct zink_blend_state *blend_state;
    struct zink_render_pass *render_pass;
    VkPipeline pipeline;
diff --git a/src/gallium/drivers/zink/zink_program.c 
b/src/gallium/drivers/zink/zink_program.c
index d0121762071..fd569f449d1 100644
--- a/src/gallium/drivers/zink/zink_program.c
+++ b/src/gallium/drivers/zink/zink_program.c
@@ -105,11 +105,11 @@ keybox_equals(const void *void_a, const void *void_b)
 }
 
 static void
-shader_key_vs_gen(struct zink_context *ctx, struct zink_shader *zs,
-                  struct zink_shader *shaders[ZINK_SHADER_COUNT], struct 
zink_shader_key *key)
+shader_key_vs_base_gen(struct zink_context *ctx, struct zink_shader *zs,
+                       struct zink_shader *shaders[ZINK_SHADER_COUNT], struct 
zink_shader_key *key)
 {
-   struct zink_vs_key *vs_key = &key->key.vs;
-   key->size = sizeof(struct zink_vs_key);
+   struct zink_vs_key_base *vs_key = &key->key.vs_base;
+   key->size = sizeof(struct zink_vs_key_base);
 
    vs_key->clip_halfz = ctx->rast_state && ctx->rast_state->base.clip_halfz;
    switch (zs->nir->info.stage) {
@@ -128,6 +128,17 @@ shader_key_vs_gen(struct zink_context *ctx, struct 
zink_shader *zs,
    }
 }
 
+static void
+shader_key_vs_gen(struct zink_context *ctx, struct zink_shader *zs,
+                       struct zink_shader *shaders[ZINK_SHADER_COUNT], struct 
zink_shader_key *key)
+{
+   struct zink_vs_key *vs_key = &key->key.vs;
+   shader_key_vs_base_gen(ctx, zs, shaders, key);
+   vs_key->decomposed_attrs = ctx->element_state->decomposed_attrs;
+   vs_key->decomposed_attrs_without_w = 
ctx->element_state->decomposed_attrs_without_w;
+   key->size += 2 * 4;
+}
+
 static void
 shader_key_fs_gen(struct zink_context *ctx, struct zink_shader *zs,
                   struct zink_shader *shaders[ZINK_SHADER_COUNT], struct 
zink_shader_key *key)
@@ -167,8 +178,8 @@ static zink_shader_key_gen shader_key_vtbl[] =
    [MESA_SHADER_VERTEX] = shader_key_vs_gen,
    [MESA_SHADER_TESS_CTRL] = shader_key_tcs_gen,
    /* reusing vs key for now since we're only using clip_halfz */
-   [MESA_SHADER_TESS_EVAL] = shader_key_vs_gen,
-   [MESA_SHADER_GEOMETRY] = shader_key_vs_gen,
+   [MESA_SHADER_TESS_EVAL] = shader_key_vs_base_gen,
+   [MESA_SHADER_GEOMETRY] = shader_key_vs_base_gen,
    [MESA_SHADER_FRAGMENT] = shader_key_fs_gen,
 };
 
@@ -179,7 +190,7 @@ get_default_shader_module_ptr(struct zink_gfx_program 
*prog, struct zink_shader
    if (zs->nir->info.stage == MESA_SHADER_VERTEX ||
        zs->nir->info.stage == MESA_SHADER_TESS_EVAL) {
       /* no streamout or halfz */
-      if (!zink_vs_key(key)->last_vertex_stage)
+      if (!zink_vs_key_base(key)->last_vertex_stage)
          return &prog->default_variants[zs->nir->info.stage][1];
    }
    return &prog->default_variants[zs->nir->info.stage][0];
diff --git a/src/gallium/drivers/zink/zink_screen.c 
b/src/gallium/drivers/zink/zink_screen.c
index a08010f9e3c..184ff083678 100644
--- a/src/gallium/drivers/zink/zink_screen.c
+++ b/src/gallium/drivers/zink/zink_screen.c
@@ -994,9 +994,15 @@ zink_is_format_supported(struct pipe_screen *pscreen,
    VkFormatProperties props = screen->format_props[format];
 
    if (target == PIPE_BUFFER) {
-      if (bind & PIPE_BIND_VERTEX_BUFFER &&
-          !(props.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT))
-         return false;
+      if (bind & PIPE_BIND_VERTEX_BUFFER) {
+         if (!(props.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT)) {
+            enum pipe_format new_format = zink_decompose_vertex_format(format);
+            if (!new_format)
+               return false;
+            if (!(screen->format_props[new_format].bufferFeatures & 
VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT))
+               return false;
+         }
+      }
 
       if (bind & PIPE_BIND_SAMPLER_VIEW &&
          !(props.bufferFeatures & VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT))
diff --git a/src/gallium/drivers/zink/zink_shader_keys.h 
b/src/gallium/drivers/zink/zink_shader_keys.h
index f0dc1447e46..61ad37f542f 100644
--- a/src/gallium/drivers/zink/zink_shader_keys.h
+++ b/src/gallium/drivers/zink/zink_shader_keys.h
@@ -26,12 +26,19 @@
 #ifndef ZINK_SHADER_KEYS_H
 # define ZINK_SHADER_KEYS_H
 
-struct zink_vs_key {
+struct zink_vs_key_base {
    bool clip_halfz;
    bool push_drawid;
    bool last_vertex_stage;
 };
 
+struct zink_vs_key {
+   struct zink_vs_key_base base;
+   uint8_t pad;
+   uint32_t decomposed_attrs;
+   uint32_t decomposed_attrs_without_w;
+};
+
 struct zink_fs_key {
    uint8_t coord_replace_bits;
    bool coord_replace_yinvert;
@@ -52,6 +59,7 @@ struct zink_shader_key {
    union {
       /* reuse vs key for now with tes/gs since we only use clip_halfz */
       struct zink_vs_key vs;
+      struct zink_vs_key_base vs_base;
       struct zink_fs_key fs;
    } key;
    struct zink_shader_key_base base;
@@ -67,6 +75,12 @@ zink_fs_key(const struct zink_shader_key *key)
    return &key->key.fs;
 }
 
+static inline const struct zink_vs_key_base *
+zink_vs_key_base(const struct zink_shader_key *key)
+{
+   return &key->key.vs_base;
+}
+
 static inline const struct zink_vs_key *
 zink_vs_key(const struct zink_shader_key *key)
 {
diff --git a/src/gallium/drivers/zink/zink_state.c 
b/src/gallium/drivers/zink/zink_state.c
index e36197c1d89..9810f5c1f8e 100644
--- a/src/gallium/drivers/zink/zink_state.c
+++ b/src/gallium/drivers/zink/zink_state.c
@@ -24,6 +24,7 @@
 #include "zink_state.h"
 
 #include "zink_context.h"
+#include "zink_format.h"
 #include "zink_screen.h"
 
 #include "compiler/shader_enums.h"
@@ -49,6 +50,10 @@ zink_create_vertex_elements_state(struct pipe_context *pctx,
       buffer_map[i] = -1;
 
    int num_bindings = 0;
+   unsigned num_decomposed = 0;
+   uint32_t size8 = 0;
+   uint32_t size16 = 0;
+   uint32_t size32 = 0;
    for (i = 0; i < num_elements; ++i) {
       const struct pipe_vertex_element *elem = elements + i;
 
@@ -59,7 +64,6 @@ zink_create_vertex_elements_state(struct pipe_context *pctx,
       }
       binding = buffer_map[binding];
 
-
       ves->bindings[binding].binding = binding;
       ves->bindings[binding].inputRate = elem->instance_divisor ? 
VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
 
@@ -68,24 +72,73 @@ zink_create_vertex_elements_state(struct pipe_context *pctx,
          debug_printf("zink: clamping instance divisor %u to %u\n", 
elem->instance_divisor, screen->info.vdiv_props.maxVertexAttribDivisor);
       ves->divisor[binding] = MIN2(elem->instance_divisor, 
screen->info.vdiv_props.maxVertexAttribDivisor);
 
+      VkFormat format;
+      if (screen->format_props[elem->src_format].bufferFeatures & 
VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT)
+         format = zink_get_format(screen, elem->src_format);
+      else {
+         enum pipe_format new_format = 
zink_decompose_vertex_format(elem->src_format);
+         assert(new_format);
+         num_decomposed++;
+         assert(screen->format_props[new_format].bufferFeatures & 
VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT);
+         if (util_format_get_blocksize(new_format) == 4)
+            size32 |= BITFIELD_BIT(i);
+         else if (util_format_get_blocksize(new_format) == 2)
+            size16 |= BITFIELD_BIT(i);
+         else
+            size8 |= BITFIELD_BIT(i);
+         format = zink_get_format(screen, new_format);
+         unsigned size;
+         if (i < 8)
+            size = 1;
+         else if (i < 16)
+            size = 2;
+         else
+            size = 4;
+         if (util_format_get_nr_components(elem->src_format) == 4) {
+            ves->decomposed_attrs |= BITFIELD_BIT(i);
+            ves->decomposed_attrs_size = size;
+         } else {
+            ves->decomposed_attrs_without_w |= BITFIELD_BIT(i);
+         }
+      }
+
       if (screen->info.have_EXT_vertex_input_dynamic_state) {
          ves->hw_state.dynattribs[i].sType = 
VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT;
          ves->hw_state.dynattribs[i].binding = binding;
          ves->hw_state.dynattribs[i].location = i;
-         ves->hw_state.dynattribs[i].format = zink_get_format(screen,
-                                                           elem->src_format);
+         ves->hw_state.dynattribs[i].format = format;
          assert(ves->hw_state.dynattribs[i].format != VK_FORMAT_UNDEFINED);
          ves->hw_state.dynattribs[i].offset = elem->src_offset;
       } else {
          ves->hw_state.attribs[i].binding = binding;
          ves->hw_state.attribs[i].location = i;
-         ves->hw_state.attribs[i].format = zink_get_format(screen,
-                                                           elem->src_format);
+         ves->hw_state.attribs[i].format = format;
          assert(ves->hw_state.attribs[i].format != VK_FORMAT_UNDEFINED);
          ves->hw_state.attribs[i].offset = elem->src_offset;
       }
    }
-
+   assert(num_decomposed + num_elements <= PIPE_MAX_ATTRIBS);
+   u_foreach_bit(i, ves->decomposed_attrs | ves->decomposed_attrs_without_w) {
+      const struct pipe_vertex_element *elem = elements + i;
+      const struct util_format_description *desc = 
util_format_description(elem->src_format);
+      unsigned size = 1;
+      if (size32 & BITFIELD_BIT(i))
+         size = 4;
+      else if (size16 & BITFIELD_BIT(i))
+         size = 2;
+      for (unsigned j = 1; j < desc->nr_channels; j++) {
+         if (screen->info.have_EXT_vertex_input_dynamic_state) {
+            memcpy(&ves->hw_state.dynattribs[num_elements], 
&ves->hw_state.dynattribs[i], sizeof(VkVertexInputAttributeDescription2EXT));
+            ves->hw_state.dynattribs[num_elements].location = num_elements;
+            ves->hw_state.dynattribs[num_elements].offset += j * size;
+         } else {
+            memcpy(&ves->hw_state.attribs[num_elements], 
&ves->hw_state.attribs[i], sizeof(VkVertexInputAttributeDescription));
+            ves->hw_state.attribs[num_elements].location = num_elements;
+            ves->hw_state.attribs[num_elements].offset += j * size;
+         }
+         num_elements++;
+      }
+   }
    ves->hw_state.num_bindings = num_bindings;
    ves->hw_state.num_attribs = num_elements;
    if (screen->info.have_EXT_vertex_input_dynamic_state) {
@@ -124,6 +177,11 @@ zink_bind_vertex_elements_state(struct pipe_context *pctx,
          ctx->vertex_state_changed = 
!zink_screen(pctx->screen)->info.have_EXT_vertex_input_dynamic_state;
          ctx->vertex_buffers_dirty = ctx->element_state->hw_state.num_bindings 
> 0;
       }
+      if (ctx->element_state->decomposed_attrs != state->decomposed_attrs ||
+          ctx->element_state->decomposed_attrs_without_w != 
state->decomposed_attrs_without_w)
+         ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_VERTEX);
+      state->decomposed_attrs = ctx->element_state->decomposed_attrs;
+      state->decomposed_attrs_without_w = 
ctx->element_state->decomposed_attrs_without_w;
       state->element_state = &ctx->element_state->hw_state;
    } else {
      state->element_state = NULL;
diff --git a/src/gallium/drivers/zink/zink_state.h 
b/src/gallium/drivers/zink/zink_state.h
index e9c33c1c917..9efcadf4a4c 100644
--- a/src/gallium/drivers/zink/zink_state.h
+++ b/src/gallium/drivers/zink/zink_state.h
@@ -52,6 +52,9 @@ struct zink_vertex_elements_state {
    } bindings[PIPE_MAX_ATTRIBS];
    uint32_t divisor[PIPE_MAX_ATTRIBS];
    uint8_t binding_map[PIPE_MAX_ATTRIBS];
+   uint32_t decomposed_attrs;
+   unsigned decomposed_attrs_size;
+   uint32_t decomposed_attrs_without_w;
    struct zink_vertex_elements_hw_state hw_state;
 };
 

Reply via email to