Module: Mesa
Branch: main
Commit: 88db736472b6e9b18d3736a42b038e2a7836b0b4
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=88db736472b6e9b18d3736a42b038e2a7836b0b4

Author: Connor Abbott <cwabbo...@gmail.com>
Date:   Fri Sep 29 19:15:44 2023 +0200

tu: Rework dynamic offset handling

With shader objects, we won't have the pipeline layout available. This
means that the current way we implement dynamic offset descriptors in
combination with fast-linking and independent descriptor sets, where we
use the pipeline layout when fast-linking that has pre-computed offsets
for each descriptor set, won't work. Instead we need to piece together
the sizes of the descriptors in each descriptor set from the shaders.
This is already effectively what we do when we stitch together the
pipeline layout when fast-linking, but we need to make it work with just
the shaders.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25679>

---

 src/freedreno/vulkan/tu_cmd_buffer.cc     | 15 ++++++---
 src/freedreno/vulkan/tu_descriptor_set.cc | 26 ----------------
 src/freedreno/vulkan/tu_descriptor_set.h  |  4 ---
 src/freedreno/vulkan/tu_pipeline.cc       | 39 +++++++++++++++--------
 src/freedreno/vulkan/tu_pipeline.h        |  2 ++
 src/freedreno/vulkan/tu_shader.cc         | 51 ++++++++++++++++++++++++++++---
 src/freedreno/vulkan/tu_shader.h          |  8 +++++
 7 files changed, 95 insertions(+), 50 deletions(-)

diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc 
b/src/freedreno/vulkan/tu_cmd_buffer.cc
index 7f14d08af31..9bb36cf5c9b 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.cc
+++ b/src/freedreno/vulkan/tu_cmd_buffer.cc
@@ -2475,6 +2475,11 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
    descriptors_state->max_sets_bound =
       MAX2(descriptors_state->max_sets_bound, firstSet + descriptorSetCount);
 
+   unsigned dynamic_offset_offset = 0;
+   for (unsigned i = 0; i < firstSet; i++) {
+      dynamic_offset_offset += layout->set[i].layout->dynamic_offset_size;
+   }
+
    for (unsigned i = 0; i < descriptorSetCount; ++i) {
       unsigned idx = i + firstSet;
       TU_FROM_HANDLE(tu_descriptor_set, set, pDescriptorSets[i]);
@@ -2494,7 +2499,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
 
       uint32_t *src = set->dynamic_descriptors;
       uint32_t *dst = descriptors_state->dynamic_descriptors +
-         layout->set[idx].dynamic_offset_start / 4;
+         dynamic_offset_offset / 4;
       for (unsigned j = 0; j < set->layout->binding_count; j++) {
          struct tu_descriptor_set_binding_layout *binding =
             &set->layout->binding[j];
@@ -2550,15 +2555,17 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
             }
          }
       }
+
+      dynamic_offset_offset += layout->set[idx].layout->dynamic_offset_size;
    }
    assert(dyn_idx == dynamicOffsetCount);
 
-   if (layout->dynamic_offset_size) {
+   if (dynamic_offset_offset) {
       /* allocate and fill out dynamic descriptor set */
       struct tu_cs_memory dynamic_desc_set;
       int reserved_set_idx = cmd->device->physical_device->reserved_set_idx;
       VkResult result = tu_cs_alloc(&cmd->sub_cs,
-                                    layout->dynamic_offset_size / (4 * 
A6XX_TEX_CONST_DWORDS),
+                                    dynamic_offset_offset / (4 * 
A6XX_TEX_CONST_DWORDS),
                                     A6XX_TEX_CONST_DWORDS, &dynamic_desc_set);
       if (result != VK_SUCCESS) {
          vk_command_buffer_set_error(&cmd->vk, result);
@@ -2566,7 +2573,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
       }
 
       memcpy(dynamic_desc_set.map, descriptors_state->dynamic_descriptors,
-             layout->dynamic_offset_size);
+             dynamic_offset_offset);
       assert(reserved_set_idx >= 0); /* reserved set must be bound */
       descriptors_state->set_iova[reserved_set_idx] = dynamic_desc_set.iova | 
BINDLESS_DESCRIPTOR_64B;
       descriptors_state->dynamic_bound = true;
diff --git a/src/freedreno/vulkan/tu_descriptor_set.cc 
b/src/freedreno/vulkan/tu_descriptor_set.cc
index 3dab17978a1..5d106ed16d2 100644
--- a/src/freedreno/vulkan/tu_descriptor_set.cc
+++ b/src/freedreno/vulkan/tu_descriptor_set.cc
@@ -488,39 +488,15 @@ sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx,
 void
 tu_pipeline_layout_init(struct tu_pipeline_layout *layout)
 {
-   unsigned dynamic_offset_size = 0;
-
-   for (uint32_t set = 0; set < layout->num_sets; set++) {
-      layout->set[set].dynamic_offset_start = dynamic_offset_size;
-
-      if (layout->set[set].layout)
-         dynamic_offset_size += layout->set[set].layout->dynamic_offset_size;
-   }
-
-   layout->dynamic_offset_size = dynamic_offset_size;
-
-   /* We only care about INDEPENDENT_SETS for dynamic-offset descriptors,
-    * where all the descriptors from all the sets are combined into one set
-    * and we have to provide the dynamic_offset_start dynamically with fast
-    * linking.
-    */
-   if (dynamic_offset_size == 0) {
-      layout->independent_sets = false;
-   }
-
    struct mesa_sha1 ctx;
    _mesa_sha1_init(&ctx);
    for (unsigned s = 0; s < layout->num_sets; s++) {
       if (layout->set[s].layout)
          sha1_update_descriptor_set_layout(&ctx, layout->set[s].layout);
-      _mesa_sha1_update(&ctx, &layout->set[s].dynamic_offset_start,
-                        sizeof(layout->set[s].dynamic_offset_start));
    }
    _mesa_sha1_update(&ctx, &layout->num_sets, sizeof(layout->num_sets));
    _mesa_sha1_update(&ctx, &layout->push_constant_size,
                      sizeof(layout->push_constant_size));
-   _mesa_sha1_update(&ctx, &layout->independent_sets,
-                     sizeof(layout->independent_sets));
    _mesa_sha1_final(&ctx, layout->sha1);
 }
 
@@ -562,8 +538,6 @@ tu_CreatePipelineLayout(VkDevice _device,
    }
 
    layout->push_constant_size = align(layout->push_constant_size, 16);
-   layout->independent_sets =
-      pCreateInfo->flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT;
 
    tu_pipeline_layout_init(layout);
 
diff --git a/src/freedreno/vulkan/tu_descriptor_set.h 
b/src/freedreno/vulkan/tu_descriptor_set.h
index c272b084e06..92d47a953f0 100644
--- a/src/freedreno/vulkan/tu_descriptor_set.h
+++ b/src/freedreno/vulkan/tu_descriptor_set.h
@@ -93,14 +93,10 @@ struct tu_pipeline_layout
    {
       struct tu_descriptor_set_layout *layout;
       uint32_t size;
-      uint32_t dynamic_offset_start;
    } set[MAX_SETS];
 
-   bool independent_sets;
-
    uint32_t num_sets;
    uint32_t push_constant_size;
-   uint32_t dynamic_offset_size;
 
    unsigned char sha1[20];
 };
diff --git a/src/freedreno/vulkan/tu_pipeline.cc 
b/src/freedreno/vulkan/tu_pipeline.cc
index 6817ff5291f..b4080598b7a 100644
--- a/src/freedreno/vulkan/tu_pipeline.cc
+++ b/src/freedreno/vulkan/tu_pipeline.cc
@@ -168,7 +168,7 @@ tu6_emit_load_state(struct tu_device *device,
          case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
             assert(device->physical_device->reserved_set_idx >= 0);
             base = device->physical_device->reserved_set_idx;
-            offset = (layout->set[i].dynamic_offset_start +
+            offset = (pipeline->program.dynamic_descriptor_offsets[i] +
                       binding->dynamic_offset_offset) / 4;
             FALLTHROUGH;
          case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
@@ -205,7 +205,7 @@ tu6_emit_load_state(struct tu_device *device,
          case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
             assert(device->physical_device->reserved_set_idx >= 0);
             base = device->physical_device->reserved_set_idx;
-            offset = (layout->set[i].dynamic_offset_start +
+            offset = (pipeline->program.dynamic_descriptor_offsets[i] +
                       binding->dynamic_offset_offset) / 4;
             FALLTHROUGH;
          case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
@@ -405,7 +405,7 @@ static void
 tu6_emit_dynamic_offset(struct tu_cs *cs,
                         const struct ir3_shader_variant *xs,
                         const struct tu_shader *shader,
-                        struct tu_pipeline_builder *builder)
+                        const struct tu_program_state *program)
 {
    const struct tu_physical_device *phys_dev = cs->device->physical_device;
    if (!xs || shader->const_state.dynamic_offset_loc == UINT32_MAX)
@@ -422,8 +422,8 @@ tu6_emit_dynamic_offset(struct tu_cs *cs,
 
    for (unsigned i = 0; i < phys_dev->usable_sets; i++) {
       unsigned dynamic_offset_start =
-         builder->layout.set[i].dynamic_offset_start / (A6XX_TEX_CONST_DWORDS 
* 4);
-      tu_cs_emit(cs, i < builder->layout.num_sets ? dynamic_offset_start : 0);
+         program->dynamic_descriptor_offsets[i] / (A6XX_TEX_CONST_DWORDS * 4);
+      tu_cs_emit(cs, dynamic_offset_start);
    }
 }
 
@@ -1151,14 +1151,14 @@ tu6_emit_geom_tess_consts(struct tu_cs *cs,
 template <chip CHIP>
 static void
 tu6_emit_program_config(struct tu_cs *cs,
-                        struct tu_pipeline *pipeline,
-                        struct tu_pipeline_builder *builder,
+                        const struct tu_program_state *prog,
+                        struct tu_shader **shaders,
                         const struct ir3_shader_variant **variants)
 {
    STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
 
    bool shared_consts_enable =
-      pipeline->program.shared_consts.type == IR3_PUSH_CONSTS_SHARED;
+      prog->shared_consts.type == IR3_PUSH_CONSTS_SHARED;
    tu6_emit_shared_consts_enable<CHIP>(cs, shared_consts_enable);
 
    tu_cs_emit_regs(cs, HLSQ_INVALIDATE_CMD(CHIP,
@@ -1178,7 +1178,7 @@ tu6_emit_program_config(struct tu_cs *cs,
    for (size_t stage_idx = MESA_SHADER_VERTEX;
         stage_idx <= MESA_SHADER_FRAGMENT; stage_idx++) {
       gl_shader_stage stage = (gl_shader_stage) stage_idx;
-      tu6_emit_dynamic_offset(cs, variants[stage], pipeline->shaders[stage], 
builder);
+      tu6_emit_dynamic_offset(cs, variants[stage], shaders[stage], prog);
    }
 
    const struct ir3_shader_variant *vs = variants[MESA_SHADER_VERTEX];
@@ -2245,7 +2245,6 @@ tu_pipeline_builder_parse_layout(struct 
tu_pipeline_builder *builder,
          }
 
          builder->layout.push_constant_size = library->push_constant_size;
-         builder->layout.independent_sets |= library->independent_sets;
       }
 
       tu_pipeline_layout_init(&builder->layout);
@@ -2261,7 +2260,6 @@ tu_pipeline_builder_parse_layout(struct 
tu_pipeline_builder *builder,
             vk_descriptor_set_layout_ref(&library->layouts[i]->vk);
       }
       library->push_constant_size = builder->layout.push_constant_size;
-      library->independent_sets = builder->layout.independent_sets;
    }
 }
 
@@ -2294,6 +2292,8 @@ tu_pipeline_builder_parse_shader_stages(struct 
tu_pipeline_builder *builder,
    uint32_t safe_variants =
       ir3_trim_constlen(variants, builder->device->compiler);
 
+   unsigned dynamic_descriptor_sizes[MAX_SETS] = { };
+
    for (gl_shader_stage stage = MESA_SHADER_VERTEX;
         stage < ARRAY_SIZE(variants); stage = (gl_shader_stage) (stage+1)) {
       if (pipeline->shaders[stage]) {
@@ -2303,6 +2303,13 @@ tu_pipeline_builder_parse_shader_stages(struct 
tu_pipeline_builder *builder,
          } else {
             draw_states[stage] = pipeline->shaders[stage]->state;
          }
+
+         for (unsigned i = 0; i < MAX_SETS; i++) {
+            if (pipeline->shaders[stage]->dynamic_descriptor_sizes[i] >= 0) {
+               dynamic_descriptor_sizes[i] =
+                  pipeline->shaders[stage]->dynamic_descriptor_sizes[i];
+            }
+         }
       }
    }
 
@@ -2322,6 +2329,13 @@ tu_pipeline_builder_parse_shader_stages(struct 
tu_pipeline_builder *builder,
       }
    }
 
+   unsigned dynamic_descriptor_offset = 0;
+   for (unsigned i = 0; i < MAX_SETS; i++) {
+      pipeline->program.dynamic_descriptor_offsets[i] =
+         dynamic_descriptor_offset;
+      dynamic_descriptor_offset += dynamic_descriptor_sizes[i];
+   }
+
    /* Emit HLSQ_xS_CNTL/HLSQ_SP_xS_CONFIG *first*, before emitting anything
     * else that could depend on that state (like push constants)
     *
@@ -2334,7 +2348,8 @@ tu_pipeline_builder_parse_shader_stages(struct 
tu_pipeline_builder *builder,
     * and draw passes.
     */
    tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);
-   tu6_emit_program_config<CHIP>(&prog_cs, pipeline, builder, variants);
+   tu6_emit_program_config<CHIP>(&prog_cs, &pipeline->program,
+                                 pipeline->shaders, variants);
    pipeline->program.config_state = tu_cs_end_draw_state(&pipeline->cs, 
&prog_cs);
 
    pipeline->program.vs_state = draw_states[MESA_SHADER_VERTEX];
diff --git a/src/freedreno/vulkan/tu_pipeline.h 
b/src/freedreno/vulkan/tu_pipeline.h
index a96e2cd4fea..523d025e6fe 100644
--- a/src/freedreno/vulkan/tu_pipeline.h
+++ b/src/freedreno/vulkan/tu_pipeline.h
@@ -99,6 +99,8 @@ struct tu_program_state
 
       struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES];
 
+      unsigned dynamic_descriptor_offsets[MAX_SETS];
+
       bool per_view_viewport;
 };
 
diff --git a/src/freedreno/vulkan/tu_shader.cc 
b/src/freedreno/vulkan/tu_shader.cc
index 2179a64f8e9..fd4118a04a8 100644
--- a/src/freedreno/vulkan/tu_shader.cc
+++ b/src/freedreno/vulkan/tu_shader.cc
@@ -188,8 +188,18 @@ lower_vulkan_resource_index(struct tu_device *dev, 
nir_builder *b,
 
    switch (binding_layout->type) {
    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
-   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-      if (layout->independent_sets) {
+   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+      int offset = 0;
+      for (unsigned i = 0; i < set; i++) {
+         if (shader->dynamic_descriptor_sizes[i] >= 0) {
+            offset += shader->dynamic_descriptor_sizes[i];
+         } else {
+            offset = -1;
+            break;
+         }
+      }
+
+      if (offset < 0) {
          /* With independent sets, we don't know
           * layout->set[set].dynamic_offset_start until after link time which
           * with fast linking means after the shader is compiled. We have to
@@ -201,12 +211,13 @@ lower_vulkan_resource_index(struct tu_device *dev, 
nir_builder *b,
                              .base = shader->const_state.dynamic_offset_loc + 
set);
          base = nir_iadd(b, base, dynamic_offset_start);
       } else {
-         base = nir_imm_int(b, (layout->set[set].dynamic_offset_start +
+         base = nir_imm_int(b, (offset +
             binding_layout->dynamic_offset_offset) / (4 * 
A6XX_TEX_CONST_DWORDS));
       }
       assert(dev->physical_device->reserved_set_idx >= 0);
       set = dev->physical_device->reserved_set_idx;
       break;
+   }
    default:
       base = nir_imm_int(b, binding_layout->offset / (4 * 
A6XX_TEX_CONST_DWORDS));
       break;
@@ -749,7 +760,21 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
       align(DIV_ROUND_UP(const_state->push_consts.dwords, 4),
             dev->compiler->const_upload_unit);
 
-   if (layout->independent_sets) {
+   bool unknown_dynamic_size = false;
+   bool unknown_dynamic_offset = false;
+   for (unsigned i = 0; i < layout->num_sets; i++) {
+      if (tu_shader->dynamic_descriptor_sizes[i] == -1) {
+         unknown_dynamic_size = true;
+      } else if (unknown_dynamic_size &&
+                 tu_shader->dynamic_descriptor_sizes[i] > 0) {
+         /* If there is an unknown size followed by a known size, then we may
+          * need to dynamically determine the offset when linking.
+          */
+         unknown_dynamic_offset = true;
+      }
+   }
+
+   if (unknown_dynamic_offset) {
       const_state->dynamic_offset_loc = reserved_consts_vec4 * 4;
       assert(dev->physical_device->reserved_set_idx >= 0);
       reserved_consts_vec4 += 
DIV_ROUND_UP(dev->physical_device->reserved_set_idx, 4);
@@ -2121,6 +2146,8 @@ tu_shader_serialize(struct vk_pipeline_cache_object 
*object,
       container_of(object, struct tu_shader, base);
 
    blob_write_bytes(blob, &shader->const_state, sizeof(shader->const_state));
+   blob_write_bytes(blob, &shader->dynamic_descriptor_sizes,
+                    sizeof(shader->dynamic_descriptor_sizes));
    blob_write_uint32(blob, shader->view_mask);
    blob_write_uint8(blob, shader->active_desc_sets);
 
@@ -2133,6 +2160,8 @@ tu_shader_serialize(struct vk_pipeline_cache_object 
*object,
       blob_write_uint8(blob, 0);
    }
 
+
+
    switch (shader->variant->type) {
    case MESA_SHADER_TESS_EVAL:
       blob_write_bytes(blob, &shader->tes, sizeof(shader->tes));
@@ -2162,6 +2191,8 @@ tu_shader_deserialize(struct vk_pipeline_cache *cache,
       return NULL;
 
    blob_copy_bytes(blob, &shader->const_state, sizeof(shader->const_state));
+   blob_copy_bytes(blob, &shader->dynamic_descriptor_sizes,
+                   sizeof(shader->dynamic_descriptor_sizes));
    shader->view_mask = blob_read_uint32(blob);
    shader->active_desc_sets = blob_read_uint8(blob);
 
@@ -2305,6 +2336,15 @@ tu_shader_create(struct tu_device *dev,
          nir->info.stage == MESA_SHADER_GEOMETRY)
       tu_gather_xfb_info(nir, &so_info);
 
+   for (unsigned i = 0; i < layout->num_sets; i++) {
+      if (layout->set[i].layout) {
+         shader->dynamic_descriptor_sizes[i] =
+            layout->set[i].layout->dynamic_offset_size;
+      } else {
+         shader->dynamic_descriptor_sizes[i] = -1;
+      }
+   }
+
    unsigned reserved_consts_vec4 = 0;
    NIR_PASS_V(nir, tu_lower_io, dev, shader, layout, &reserved_consts_vec4);
 
@@ -2464,6 +2504,9 @@ tu_empty_fs_create(struct tu_device *dev, struct 
tu_shader **shader,
    if (fragment_density_map)
       (*shader)->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ;
 
+   for (unsigned i = 0; i < MAX_SETS; i++)
+      (*shader)->dynamic_descriptor_sizes[i] = -1;
+
    struct ir3_shader *ir3_shader =
       ir3_shader_from_nir(dev->compiler, fs_b.shader, &options, &so_info);
    (*shader)->variant = ir3_shader_create_variant(ir3_shader, &key, false);
diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h
index 1d6ec2e4af8..eed38243e9d 100644
--- a/src/freedreno/vulkan/tu_shader.h
+++ b/src/freedreno/vulkan/tu_shader.h
@@ -13,6 +13,7 @@
 #include "tu_common.h"
 #include "tu_cs.h"
 #include "tu_suballoc.h"
+#include "tu_descriptor_set.h"
 
 struct tu_inline_ubo
 {
@@ -69,6 +70,13 @@ struct tu_shader
    uint32_t view_mask;
    uint8_t active_desc_sets;
 
+   /* The dynamic buffer descriptor size for descriptor sets that we know
+    * about. This is used when linking to piece together the sizes and from
+    * there calculate the offsets. It's -1 if we don't know because the
+    * descriptor set layout is NULL.
+    */
+   int dynamic_descriptor_sizes[MAX_SETS];
+
    union {
       struct {
          unsigned patch_type;

Reply via email to