On 9/17/18 1:43 AM, Lionel Landwerlin wrote:
On 16/09/2018 21:57, Bas Nieuwenhuizen wrote:
On Tue, Sep 11, 2018 at 10:23 PM Lionel Landwerlin
<lionel.g.landwer...@intel.com> wrote:
This new extension adds an implicitly allocated block of uniforms into
the descriptors sets through a new descriptor type.

We implement this by having a single BO in the descriptor set pool
from which we source uniforms.

Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
---
  src/intel/vulkan/anv_cmd_buffer.c             |   3 +
  src/intel/vulkan/anv_descriptor_set.c         | 238 +++++++++++++++++-
  src/intel/vulkan/anv_device.c                 |  22 ++
  src/intel/vulkan/anv_extensions.py            |   1 +
  .../vulkan/anv_nir_apply_pipeline_layout.c    |  52 ++++
  src/intel/vulkan/anv_private.h                |  33 +++
  src/intel/vulkan/genX_cmd_buffer.c            |  32 ++-
  7 files changed, 367 insertions(+), 14 deletions(-)

diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index 8ef71b0ed9c..b14be94f470 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -651,6 +651,7 @@ anv_isl_format_for_descriptor_type(VkDescriptorType type)
     switch (type) {
     case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
     case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+   case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
        return ISL_FORMAT_R32G32B32A32_FLOAT;

     case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
@@ -1039,6 +1040,8 @@ void anv_CmdPushDescriptorSetKHR(
           }
           break;

+      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+         unreachable("Invalid descriptor type for push descriptors");
        default:
           break;
        }
diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c
index 3439f828900..2e5f2a1f288 100644
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -26,8 +26,10 @@
  #include <string.h>
  #include <unistd.h>
  #include <fcntl.h>
+#include <sys/mman.h>

  #include "util/mesa-sha1.h"
+#include "vk_util.h"

  #include "anv_private.h"

@@ -40,7 +42,8 @@ void anv_GetDescriptorSetLayoutSupport(
      const VkDescriptorSetLayoutCreateInfo*      pCreateInfo,
      VkDescriptorSetLayoutSupport*               pSupport)
  {
-   uint32_t surface_count[MESA_SHADER_STAGES] = { 0, };
+   int16_t surface_count[MESA_SHADER_STAGES] = { 0, };
+   int16_t inline_surface_indexes[MESA_SHADER_STAGES] = { -1, };

     for (uint32_t b = 0; b < pCreateInfo->bindingCount; b++) {
        const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[b];
@@ -50,6 +53,15 @@ void anv_GetDescriptorSetLayoutSupport(
           /* There is no real limit on samplers */
           break;

+      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+         anv_foreach_stage(s, binding->stageFlags) {
+            if (inline_surface_indexes[s] < 0) {
+               inline_surface_indexes[s] = surface_count[s];
+               surface_count[s] += 1;
+            }
+         }
+         break;
+
        case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
           if (binding->pImmutableSamplers) {
              for (uint32_t i = 0; i < binding->descriptorCount; i++) {
@@ -118,6 +130,9 @@ VkResult anv_CreateDescriptorSetLayout(
     memset(set_layout, 0, sizeof(*set_layout));
     set_layout->ref_cnt = 1;
     set_layout->binding_count = max_binding + 1;
+   set_layout->inline_blocks_descriptor_index = -1;
+   memset(set_layout->inline_blocks_surface_indexes,
+          -1, sizeof(set_layout->inline_blocks_surface_indexes));

     for (uint32_t b = 0; b <= max_binding; b++) {
        /* Initialize all binding_layout entries to -1 */
@@ -159,9 +174,24 @@ VkResult anv_CreateDescriptorSetLayout(
  #ifndef NDEBUG
        set_layout->binding[b].type = binding->descriptorType;
  #endif
-      set_layout->binding[b].array_size = binding->descriptorCount;
-      set_layout->binding[b].descriptor_index = set_layout->size;
-      set_layout->size += binding->descriptorCount;
+
+      if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { +         /* We only a single descriptor entry for all the inline uniforms. */
+         set_layout->binding[b].array_size = 1;
+         if (set_layout->inline_blocks_descriptor_index < 0) {
+            set_layout->binding[b].descriptor_index =
+               set_layout->inline_blocks_descriptor_index =
+               set_layout->size;
+            set_layout->size += 1;
+         } else {
+            set_layout->binding[b].descriptor_index =
+               set_layout->inline_blocks_descriptor_index;
+         }
+      } else {
+         set_layout->binding[b].array_size = binding->descriptorCount;
+         set_layout->binding[b].descriptor_index = set_layout->size;
+         set_layout->size += binding->descriptorCount;
+      }

        switch (binding->descriptorType) {
        case VK_DESCRIPTOR_TYPE_SAMPLER:
@@ -176,6 +206,24 @@ VkResult anv_CreateDescriptorSetLayout(
        }

        switch (binding->descriptorType) {
+      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+         set_layout->binding[b].inline_block_offset = set_layout->inline_blocks_size; +         set_layout->binding[b].inline_block_length = binding->descriptorCount;
+         set_layout->inline_blocks_size += binding->descriptorCount;
+
+         anv_foreach_stage(s, binding->stageFlags) {
+            if (set_layout->inline_blocks_surface_indexes[s] < 0) {
+               set_layout->binding[b].stage[s].surface_index =
+                  set_layout->inline_blocks_surface_indexes[s] =
+                  surface_count[s];
+               surface_count[s] += 1;
+            } else {
+               set_layout->binding[b].stage[s].surface_index =
+                  set_layout->inline_blocks_surface_indexes[s];
+            }
+         }
+         break;
+
        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -235,6 +283,12 @@ VkResult anv_CreateDescriptorSetLayout(
        set_layout->shader_stages |= binding->stageFlags;
     }

+   /* Align inline uniforms total size to 32 because we source the allocation +    * from a single BO in the descriptor set pool and we want the alignment to
+    * match the push constant alignment constraint.
+    */
+   set_layout->inline_blocks_size = ALIGN(set_layout->inline_blocks_size, 32);
+
     set_layout->buffer_count = buffer_count;
     set_layout->dynamic_offset_count = dynamic_offset_count;

@@ -405,21 +459,45 @@ VkResult anv_CreateDescriptorPool(
     ANV_FROM_HANDLE(anv_device, device, _device);
     struct anv_descriptor_pool *pool;

+   vk_foreach_struct(ext, pCreateInfo->pNext) {
+      switch (ext->sType) {
+      case VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT: +         /* Our driver doesn't need to know about this as we use a single +          * binding table entry per stage if one of more inline descriptor
+          * blocks are used.
+          */
+         break;
+
+      default:
+         anv_debug_ignored_stype(ext->sType);
+         break;
+      }
+   }
+
     uint32_t descriptor_count = 0;
     uint32_t buffer_count = 0;
+   uint32_t inline_blocks_size = 0;
     for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) {
        switch (pCreateInfo->pPoolSizes[i].type) {
+      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+         inline_blocks_size += pCreateInfo->pPoolSizes[i].descriptorCount;
+         break;
        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
           buffer_count += pCreateInfo->pPoolSizes[i].descriptorCount;
+         /* Fallthrough */
        default:
           descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount;
           break;
        }
     }

+   /* We'll need one more descriptor for inline uniforms. */
+   if (inline_blocks_size > 0)
+      descriptor_count += MIN2(pCreateInfo->maxSets, inline_blocks_size / 4);
+
     const size_t pool_size =
        pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) +
        descriptor_count * sizeof(struct anv_descriptor) +
@@ -435,6 +513,34 @@ VkResult anv_CreateDescriptorPool(
     pool->next_set = 0;
     pool->free_set_list = EMPTY;

+   pool->inline_blocks_size = inline_blocks_size;
+   pool->next_block = 0;
+   pool->free_block_list = EMPTY;
+
+   pool->inline_blocks_bo = NULL;
+   if (pool->inline_blocks_size > 0) {
+      struct anv_physical_device *pdevice = &device->instance->physicalDevice;
+      uint64_t bo_flags =
+         (pdevice->supports_48bit_addresses ? EXEC_OBJECT_SUPPORTS_48B_ADDRESS : 0) |
+         (pdevice->has_exec_async ? EXEC_OBJECT_ASYNC : 0) |
+         (pdevice->use_softpin ? EXEC_OBJECT_PINNED : 0);
+
+      VkResult result = anv_bo_cache_alloc(device, &device->bo_cache,
+                                           pool->inline_blocks_size,
+                                           bo_flags,
+                                           &pool->inline_blocks_bo);
+      if (result != VK_SUCCESS)
+         goto fail_iubo_alloc;
+
+      uint32_t gem_flags = !device->info.has_llc ? I915_MMAP_WC : 0;
+      void *map = anv_gem_mmap(device, pool->inline_blocks_bo->gem_handle, +                               0, pool->inline_blocks_bo->size, gem_flags);
+      if (map == MAP_FAILED)
+         goto fail_iubo_mmap;
+
+      pool->inline_blocks_bo->map = map;
+   }
+
     anv_state_stream_init(&pool->surface_state_stream,
                           &device->surface_state_pool, 4096);
     pool->surface_state_free_list = NULL;
@@ -442,6 +548,12 @@ VkResult anv_CreateDescriptorPool(
     *pDescriptorPool = anv_descriptor_pool_to_handle(pool);

     return VK_SUCCESS;
+
+fail_iubo_mmap:
+   anv_bo_cache_release(device, &device->bo_cache, pool->inline_blocks_bo);
+fail_iubo_alloc:
+   vk_free2(&device->alloc, pAllocator, pool);
+   return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  }

  void anv_DestroyDescriptorPool(
@@ -455,6 +567,8 @@ void anv_DestroyDescriptorPool(
     if (!pool)
        return;

+   if (pool->inline_blocks_bo)
+      anv_bo_cache_release(device, &device->bo_cache, pool->inline_blocks_bo);
     anv_state_stream_finish(&pool->surface_state_stream);
     vk_free2(&device->alloc, pAllocator, pool);
  }
@@ -469,6 +583,8 @@ VkResult anv_ResetDescriptorPool(

     pool->next_set = 0;
     pool->free_set_list = EMPTY;
+   pool->next_block = 0;
+   pool->free_block_list = EMPTY;
     anv_state_stream_finish(&pool->surface_state_stream);
     anv_state_stream_init(&pool->surface_state_stream,
                           &device->surface_state_pool, 4096);
@@ -496,6 +612,36 @@ struct surface_state_free_list_entry {
     struct anv_state state;
  };

+static struct anv_descriptor_set *
+anv_descriptor_inline_block_alloc(struct anv_descriptor_pool *pool,
+                                  struct anv_descriptor_set_layout *layout,
+                                  struct anv_descriptor_set *set)
+{
+   if (layout->inline_blocks_size == 0) {
+      set->inline_blocks = NULL;
+      return set;
+   }
+
+   if (layout->inline_blocks_size <= pool->inline_blocks_size - pool->next_block) { +      set->inline_blocks = pool->inline_blocks_bo->map + pool->next_block;
+      pool->next_block += layout->inline_blocks_size;
+      return set;
+   }
+
+   struct pool_free_list_entry *entry;
+   uint32_t *link = &pool->free_block_list;
+   for (uint32_t f = pool->free_block_list; f != EMPTY; f = entry->next) { +      entry = (struct pool_free_list_entry *) (pool->inline_blocks_bo->map + f);
+      if (layout->inline_blocks_size <= entry->size) {
+         *link = entry->next;
+         set->inline_blocks = entry;
+         return set;
+      }
+   }
+
+   return NULL;
+}
+
  static struct anv_descriptor_set *
  anv_descriptor_alloc(struct anv_descriptor_pool *pool,
                       struct anv_descriptor_set_layout *layout,
@@ -504,8 +650,10 @@ anv_descriptor_alloc(struct anv_descriptor_pool *pool,
     struct anv_descriptor_set *set = NULL;

     if (size <= pool->size - pool->next_set) {
-      set = (struct anv_descriptor_set *) (pool->data + pool->next_set);
-      pool->next_set += size;
+      set = anv_descriptor_inline_block_alloc(pool, layout,
+         (struct anv_descriptor_set *) (pool->data + pool->next_set));
+      if (set)
+         pool->next_set += size;
        return set;
     }

@@ -515,8 +663,10 @@ anv_descriptor_alloc(struct anv_descriptor_pool *pool,
        entry = (struct pool_free_list_entry *) (pool->data + f);
        if (size <= entry->size) {
           uint32_t next = entry->next;
-         set = (struct anv_descriptor_set *) entry;
-         *link = next;
+         set = anv_descriptor_inline_block_alloc(pool, layout,
+            (struct anv_descriptor_set *) entry);
+         if (set)
+            *link = next;
           return set;
        }
        link = &entry->next;
@@ -573,6 +723,18 @@ anv_descriptor_set_create(struct anv_device *device,
        desc += layout->binding[b].array_size;
     }

+   /* Also fill the inline uniforms blocks if needed. */
+   if (layout->inline_blocks_descriptor_index >= 0) {
+      set->descriptors[layout->inline_blocks_descriptor_index] = (struct anv_descriptor) {
+         .type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT,
+         .inline_address = (struct anv_address) {
+            .bo = pool->inline_blocks_bo,
+            .offset = set->inline_blocks - pool->inline_blocks_bo->map,
+         },
+         .inline_range = layout->inline_blocks_size,
+      };
+   }
+
     /* Allocate surface state for the buffer views. */
     for (uint32_t b = 0; b < layout->buffer_count; b++) {
        struct surface_state_free_list_entry *entry =
@@ -600,8 +762,6 @@ anv_descriptor_set_destroy(struct anv_device *device,
                             struct anv_descriptor_pool *pool,
                             struct anv_descriptor_set *set)
  {
-   anv_descriptor_set_layout_unref(device, set->layout);
-
     /* Put the buffer view surface state back on the free list. */
     for (uint32_t b = 0; b < set->buffer_count; b++) {
        struct surface_state_free_list_entry *entry =
@@ -611,7 +771,22 @@ anv_descriptor_set_destroy(struct anv_device *device,
        pool->surface_state_free_list = entry;
     }

-   /* Put the descriptor set allocation back on the free list. */
+   /* Put the inline uniform blocks back on the free list. */
+   if (set->inline_blocks) {
+      const uint32_t index = set->inline_blocks - pool->inline_blocks_bo->map; +      if (index + set->layout->inline_blocks_size == pool->next_block) {
+         pool->next_block = index;
+      } else {
+         struct pool_free_list_entry *entry = (struct pool_free_list_entry *) set->inline_blocks;
+         entry->next = pool->free_block_list;
+         entry->size = set->layout->inline_blocks_size;
+         pool->free_block_list = (char *) entry - pool->data;
+      }
+   }
+
+   anv_descriptor_set_layout_unref(device, set->layout);
+
+   /* Finally, put the descriptor set allocation back on the free list. */
     const uint32_t index = (char *) set - pool->data;
     if (index + set->size == pool->next_set) {
        pool->next_set = index;
@@ -798,6 +973,23 @@ anv_descriptor_set_write_buffer(struct anv_descriptor_set *set,
     }
  }

+static void
+anv_descriptor_set_write_inline_uniforms(struct anv_descriptor_set *set,
+                                         VkDescriptorType type,
+                                         uint32_t binding,
+                                         uint32_t dst_offset,
+                                         const void *data,
+                                         uint32_t data_length)
+{
+   const struct anv_descriptor_set_binding_layout *bind_layout =
+      &set->layout->binding[binding];
+
+   assert(type == bind_layout->type);
+
+   memcpy(set->inline_blocks + bind_layout->inline_block_offset + dst_offset,
+          data, data_length);
+}
+
  void anv_UpdateDescriptorSets(
      VkDevice                                    _device,
      uint32_t                                    descriptorWriteCount,
@@ -826,6 +1018,20 @@ void anv_UpdateDescriptorSets(
           }
           break;

+      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
+         const VkWriteDescriptorSetInlineUniformBlockEXT *inline_ub =
+            vk_find_struct_const(write->pNext,
+ WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT);
+         assert(inline_ub);
+         anv_descriptor_set_write_inline_uniforms(set,
+ write->descriptorType,
+                                                  write->dstBinding,
+ write->dstArrayElement,
+                                                  inline_ub->pData,
+                                                  inline_ub->dataSize);
+         break;
+      }
+
        case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
        case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
           for (uint32_t j = 0; j < write->descriptorCount; j++) {
@@ -954,6 +1160,16 @@ anv_descriptor_set_write_template(struct anv_descriptor_set *set,
           }
           break;

+      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
+         anv_descriptor_set_write_inline_uniforms(set,
+                                                  entry->type,
+                                                  entry->binding,
+                                                  entry->array_element,
+                                                  data + entry->offset,
+                                                  entry->array_count);
Where in the spec did you find that the data is specified directly
instead of a VkWriteDescriptorSetInlineUniformBlockEXT struct?


It's not well stated, but what else could the data be about?

The description of the fields in VkDescriptorUpdateTemplateEntryKHR was the best hint for me.


FWIW this makes some of the dEQP-VK.binding_model.descriptorset_random* pass so I think vk-gl-cts utilizes it this way.

I agree it seems strange (at least from user POV) that the usage is not similar to vkUpdateDescriptorSets and using VkWriteDescriptorSetInlineUniformBlockEXT like would expect from extension.






+         break;
+      }
+
        default:
           break;
        }
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 1e37876eb43..5bc9f7e7e88 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -942,6 +942,15 @@ void anv_GetPhysicalDeviceFeatures2(
           break;
        }

+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
+         VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
+            (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
+
+         features->inlineUniformBlock = true;
+ features->descriptorBindingInlineUniformBlockUpdateAfterBind = false;
+         break;
+      }
+
        default:
           anv_debug_ignored_stype(ext->sType);
           break;
@@ -1198,6 +1207,19 @@ void anv_GetPhysicalDeviceProperties2(
           break;
        }

+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
+         VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
+            (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
+
+         /* All required minimum values. */
+         props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE; +         props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS; + props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 4;
+         props->maxDescriptorSetInlineUniformBlocks = 4;
+         props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 4;
+         break;
+      }
+
        default:
           anv_debug_ignored_stype(ext->sType);
           break;
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py
index 951505a854e..61803c9d7fa 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -125,6 +125,7 @@ EXTENSIONS = [
      Extension('VK_EXT_vertex_attribute_divisor',          3, True),
      Extension('VK_EXT_post_depth_coverage',               1, 'device->info.gen >= 9'),       Extension('VK_EXT_sampler_filter_minmax',             1, 'device->info.gen >= 9'),
+    Extension('VK_EXT_inline_uniform_block',              1, True),
  ]

  class VkVersion:
diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index 856101cc2ff..75bf33806f9 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -160,6 +160,22 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin,         block_index = nir_iadd(b, nir_imm_int(b, surface_index), block_index);
     }

+   /* We use a single binding table entry for all inline uniform blocks. That +    * means each block is at a different offset in that entry. We can get this +    * offset from the layout using (set, binding) but we need to add that
+    * offset into the actual load_ubo intrinsic.
+    *
+    * Here instead of just setting the block index we set a tuple
+    * (block_index, inline_block_offset) which will be replace when run into a
+    * load_ubo intrinsic (see lower_inline_uniform_block function).
+    */
+   uint32_t inline_block_offset =
+ state->layout->set[set].layout->binding[binding].inline_block_offset;
+   if (inline_block_offset != -1) {
+      block_index = nir_vec2(b, block_index,
+                             nir_imm_int(b, inline_block_offset));
+   }
+
     assert(intrin->dest.is_ssa);
     nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index));
     nir_instr_remove(&intrin->instr);
@@ -268,6 +284,39 @@ lower_load_constant(nir_intrinsic_instr *intrin,
     nir_instr_remove(&intrin->instr);
  }

+static void
+lower_inline_uniform_block(nir_intrinsic_instr *intrin,
+                           struct apply_pipeline_layout_state *state)
+{
+   if (!intrin->src[0].ssa->parent_instr ||
+       intrin->src[0].ssa->parent_instr->type != nir_instr_type_alu)
+      return;
+
+   nir_alu_instr *alu = nir_instr_as_alu(intrin->src[0].ssa->parent_instr);
+   if (alu->op != nir_op_vec2)
+      return;
+
+   nir_builder *b = &state->builder;
+   b->cursor = nir_before_instr(&intrin->instr);
+
+   nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], alu->src[0].src);
+
+   nir_const_value *const_inline_block_offset =
+      nir_src_as_const_value(alu->src[1].src);
+   assert(const_inline_block_offset);
+
+   nir_const_value *const_offset = nir_src_as_const_value(intrin->src[1]);
+   nir_ssa_def *offset;
+   if (const_offset) {
+      offset = nir_iadd(b, nir_imm_int(b, const_offset->u32[0]),
+                        nir_imm_int(b, const_inline_block_offset->u32[0]));
+   } else {
+      offset = nir_iadd(b, nir_ssa_for_src(b, intrin->src[1], 1),
+                        nir_imm_int(b, const_inline_block_offset->u32[0]));
+   }
+   nir_instr_rewrite_src(&intrin->instr, &intrin->src[1], nir_src_for_ssa(offset));
+}
+
  static void
  lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type,
                  unsigned *base_index,
@@ -401,6 +450,9 @@ apply_pipeline_layout_block(nir_block *block,
           case nir_intrinsic_load_constant:
              lower_load_constant(intrin, state);
              break;
+         case nir_intrinsic_load_ubo:
+            lower_inline_uniform_block(intrin, state);
+            break;
           default:
              break;
           }
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 372b7c69635..cea8e5786f5 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -159,6 +159,8 @@ struct gen_l3_config;
  #define MAX_DYNAMIC_BUFFERS 16
  #define MAX_IMAGES 8
  #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
+#define MAX_INLINE_UNIFORM_BLOCK_SIZE 256 /* Minimum requirement */
+#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 4 /* Minimum requirement */

  /* The kernel relocation API has a limitation of a 32-bit delta value
   * applied to the address before it is written which, in spite of it being
@@ -1394,6 +1396,16 @@ struct anv_descriptor_set_binding_layout {
     /* Index into the descriptor set buffer views */
     int16_t buffer_index;

+   /* TODO/question: should we union fields a bit? inline uniform blocks have
+    * no use for array_size, buffer_index & dynamic_offset_index.
+    */
+
+   /* Offset into the portion of data allocated for the inline uniforms. */
+   uint32_t inline_block_offset;
+
+   /* Length of the portion of data allocated for inline uniforms */
+   uint32_t inline_block_length;
+
     struct {
        /* Index into the binding table for the associated surface */
        int16_t surface_index;
@@ -1428,6 +1440,15 @@ struct anv_descriptor_set_layout {
     /* Number of dynamic offsets used by this descriptor set */
     uint16_t dynamic_offset_count;

+   /* Index into the flattend descriptor set (-1 if unused). */
+   int16_t inline_blocks_descriptor_index;
+
+   /* Data to allocate into the pool descriptor's inline uniforms BO */
+   uint32_t inline_blocks_size;
+
+   /* Index into the binding table for the associated surface */
+   int16_t inline_blocks_surface_indexes[MESA_SHADER_STAGES];
+
     /* Bindings in this descriptor set */
     struct anv_descriptor_set_binding_layout binding[0];
  };
@@ -1464,6 +1485,11 @@ struct anv_descriptor {
           uint64_t range;
        };

+      struct {
+         struct anv_address inline_address;
+         uint64_t inline_range;
+      };
+
        struct anv_buffer_view *buffer_view;
     };
  };
@@ -1472,6 +1498,7 @@ struct anv_descriptor_set {
     struct anv_descriptor_set_layout *layout;
     uint32_t size;
     uint32_t buffer_count;
+   void *inline_blocks;
     struct anv_buffer_view *buffer_views;
     struct anv_descriptor descriptors[0];
  };
@@ -1507,6 +1534,12 @@ struct anv_descriptor_pool {
     struct anv_state_stream surface_state_stream;
     void *surface_state_free_list;

+   struct anv_bo *inline_blocks_bo;
+   uint32_t inline_blocks_size;
+
+   uint32_t free_block_list;
+   uint32_t next_block;
+
     char data[0];
  };

diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 80bebf5a12c..5cb4c0f13af 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2146,6 +2146,19 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
                             desc->buffer_view->address);
           break;

+      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
+         surface_state =
+ anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
+         enum isl_format format =
+            anv_isl_format_for_descriptor_type(desc->type);
+
+         anv_fill_buffer_surface_state(cmd_buffer->device, surface_state,
+                                       format, desc->inline_address,
+                                       desc->inline_range, 1);
+         add_surface_reloc(cmd_buffer, surface_state, desc->inline_address);
+         break;
+      }
+
        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
           /* Compute the offset within the buffer */
@@ -2445,14 +2458,15 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
                    const struct anv_descriptor *desc =
                       anv_descriptor_for_binding(&gfx_state->base, binding);

-                  if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
+                  switch (desc->type) {
+                  case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
                       read_len = MIN2(range->length,
                          DIV_ROUND_UP(desc->buffer_view->range, 32) - range->start);                        read_addr = anv_address_add(desc->buffer_view->address,
                                                   range->start * 32);
-                  } else {
-                     assert(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
+                     break;

+                  case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
                       uint32_t dynamic_offset =
dynamic_offset_for_binding(&gfx_state->base, binding);
                       uint32_t buf_offset =
@@ -2464,6 +2478,18 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
                          DIV_ROUND_UP(buf_range, 32) - range->start);
                       read_addr = anv_address_add(desc->buffer->address,                                                    buf_offset + range->start * 32);
+                     break;
+                  }
+
+                  case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+                     read_len = MIN2(range->length,
+                        DIV_ROUND_UP(desc->inline_range, 32) - range->start);
+                     read_addr = anv_address_add(desc->inline_address,
+                                                 range->start * 32);
+                     break;
+
+                  default:
+                     unreachable("Invalid descriptor");
                    }
                 }

--
2.19.0.rc1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to