I can't say I know enough of all these parts but I went through the API functions and tried to check that you have proper checks in place. Will try to still review :)

I did not see any check against MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS when creating pipeline layout. I'm not sure if such is necessary (since it's implicit rule), do you think there should there be check/assert?

one minor possible addition below ..

On 11.09.2018 23:22, Lionel Landwerlin wrote:
This new extension adds an implicitly allocated block of uniforms into
the descriptors sets through a new descriptor type. > We implement this by 
having a single BO in the descriptor set pool
from which we source uniforms.

Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
---
  src/intel/vulkan/anv_cmd_buffer.c             |   3 +
  src/intel/vulkan/anv_descriptor_set.c         | 238 +++++++++++++++++-
  src/intel/vulkan/anv_device.c                 |  22 ++
  src/intel/vulkan/anv_extensions.py            |   1 +
  .../vulkan/anv_nir_apply_pipeline_layout.c    |  52 ++++
  src/intel/vulkan/anv_private.h                |  33 +++
  src/intel/vulkan/genX_cmd_buffer.c            |  32 ++-
  7 files changed, 367 insertions(+), 14 deletions(-)

diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
b/src/intel/vulkan/anv_cmd_buffer.c
index 8ef71b0ed9c..b14be94f470 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -651,6 +651,7 @@ anv_isl_format_for_descriptor_type(VkDescriptorType type)
     switch (type) {
     case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
     case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+   case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
        return ISL_FORMAT_R32G32B32A32_FLOAT;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
@@ -1039,6 +1040,8 @@ void anv_CmdPushDescriptorSetKHR(
           }
           break;
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+         unreachable("Invalid descriptor type for push descriptors");
        default:
           break;
        }
diff --git a/src/intel/vulkan/anv_descriptor_set.c 
b/src/intel/vulkan/anv_descriptor_set.c
index 3439f828900..2e5f2a1f288 100644
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -26,8 +26,10 @@
  #include <string.h>
  #include <unistd.h>
  #include <fcntl.h>
+#include <sys/mman.h>
#include "util/mesa-sha1.h"
+#include "vk_util.h"
#include "anv_private.h" @@ -40,7 +42,8 @@ void anv_GetDescriptorSetLayoutSupport(
      const VkDescriptorSetLayoutCreateInfo*      pCreateInfo,
      VkDescriptorSetLayoutSupport*               pSupport)
  {
-   uint32_t surface_count[MESA_SHADER_STAGES] = { 0, };
+   int16_t surface_count[MESA_SHADER_STAGES] = { 0, };
+   int16_t inline_surface_indexes[MESA_SHADER_STAGES] = { -1, };
for (uint32_t b = 0; b < pCreateInfo->bindingCount; b++) {
        const VkDescriptorSetLayoutBinding *binding = 
&pCreateInfo->pBindings[b];
@@ -50,6 +53,15 @@ void anv_GetDescriptorSetLayoutSupport(
           /* There is no real limit on samplers */
           break;
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+         anv_foreach_stage(s, binding->stageFlags) {
+            if (inline_surface_indexes[s] < 0) {
+               inline_surface_indexes[s] = surface_count[s];
+               surface_count[s] += 1;
+            }
+         }
+         break;
+
        case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
           if (binding->pImmutableSamplers) {
              for (uint32_t i = 0; i < binding->descriptorCount; i++) {
@@ -118,6 +130,9 @@ VkResult anv_CreateDescriptorSetLayout(
     memset(set_layout, 0, sizeof(*set_layout));
     set_layout->ref_cnt = 1;
     set_layout->binding_count = max_binding + 1;
+   set_layout->inline_blocks_descriptor_index = -1;
+   memset(set_layout->inline_blocks_surface_indexes,
+          -1, sizeof(set_layout->inline_blocks_surface_indexes));
for (uint32_t b = 0; b <= max_binding; b++) {
        /* Initialize all binding_layout entries to -1 */
@@ -159,9 +174,24 @@ VkResult anv_CreateDescriptorSetLayout(
  #ifndef NDEBUG
        set_layout->binding[b].type = binding->descriptorType;
  #endif
-      set_layout->binding[b].array_size = binding->descriptorCount;
-      set_layout->binding[b].descriptor_index = set_layout->size;
-      set_layout->size += binding->descriptorCount;
+
+      if (binding->descriptorType == 
VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {

Maybe add here

assert(binding->descriptorCount % 4 == 0 &&
       binding->descriptorCount <= MAX_INLINE_UNIFORM_BLOCK_SIZE);

?

+         /* We only a single descriptor entry for all the inline uniforms. */
+         set_layout->binding[b].array_size = 1;
+         if (set_layout->inline_blocks_descriptor_index < 0) {
+            set_layout->binding[b].descriptor_index =
+               set_layout->inline_blocks_descriptor_index =
+               set_layout->size;
+            set_layout->size += 1;
+         } else {
+            set_layout->binding[b].descriptor_index =
+               set_layout->inline_blocks_descriptor_index;
+         }
+      } else {
+         set_layout->binding[b].array_size = binding->descriptorCount;
+         set_layout->binding[b].descriptor_index = set_layout->size;
+         set_layout->size += binding->descriptorCount;
+      }
switch (binding->descriptorType) {
        case VK_DESCRIPTOR_TYPE_SAMPLER:
@@ -176,6 +206,24 @@ VkResult anv_CreateDescriptorSetLayout(
        }
switch (binding->descriptorType) {
+      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+         set_layout->binding[b].inline_block_offset = 
set_layout->inline_blocks_size;
+         set_layout->binding[b].inline_block_length = binding->descriptorCount;
+         set_layout->inline_blocks_size += binding->descriptorCount;
+
+         anv_foreach_stage(s, binding->stageFlags) {
+            if (set_layout->inline_blocks_surface_indexes[s] < 0) {
+               set_layout->binding[b].stage[s].surface_index =
+                  set_layout->inline_blocks_surface_indexes[s] =
+                  surface_count[s];
+               surface_count[s] += 1;
+            } else {
+               set_layout->binding[b].stage[s].surface_index =
+                  set_layout->inline_blocks_surface_indexes[s];
+            }
+         }
+         break;
+
        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -235,6 +283,12 @@ VkResult anv_CreateDescriptorSetLayout(
        set_layout->shader_stages |= binding->stageFlags;
     }
+ /* Align inline uniforms total size to 32 because we source the allocation
+    * from a single BO in the descriptor set pool and we want the alignment to
+    * match the push constant alignment constraint.
+    */
+   set_layout->inline_blocks_size = ALIGN(set_layout->inline_blocks_size, 32);
+
     set_layout->buffer_count = buffer_count;
     set_layout->dynamic_offset_count = dynamic_offset_count;
@@ -405,21 +459,45 @@ VkResult anv_CreateDescriptorPool(
     ANV_FROM_HANDLE(anv_device, device, _device);
     struct anv_descriptor_pool *pool;
+ vk_foreach_struct(ext, pCreateInfo->pNext) {
+      switch (ext->sType) {
+      case 
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT:
+         /* Our driver doesn't need to know about this as we use a single
+          * binding table entry per stage if one of more inline descriptor
+          * blocks are used.
+          */
+         break;
+
+      default:
+         anv_debug_ignored_stype(ext->sType);
+         break;
+      }
+   }
+
     uint32_t descriptor_count = 0;
     uint32_t buffer_count = 0;
+   uint32_t inline_blocks_size = 0;
     for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) {
        switch (pCreateInfo->pPoolSizes[i].type) {
+      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+         inline_blocks_size += pCreateInfo->pPoolSizes[i].descriptorCount;
+         break;
        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
           buffer_count += pCreateInfo->pPoolSizes[i].descriptorCount;
+         /* Fallthrough */
        default:
           descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount;
           break;
        }
     }
+ /* We'll need one more descriptor for inline uniforms. */
+   if (inline_blocks_size > 0)
+      descriptor_count += MIN2(pCreateInfo->maxSets, inline_blocks_size / 4);
+
     const size_t pool_size =
        pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) +
        descriptor_count * sizeof(struct anv_descriptor) +
@@ -435,6 +513,34 @@ VkResult anv_CreateDescriptorPool(
     pool->next_set = 0;
     pool->free_set_list = EMPTY;
+ pool->inline_blocks_size = inline_blocks_size;
+   pool->next_block = 0;
+   pool->free_block_list = EMPTY;
+
+   pool->inline_blocks_bo = NULL;
+   if (pool->inline_blocks_size > 0) {
+      struct anv_physical_device *pdevice = &device->instance->physicalDevice;
+      uint64_t bo_flags =
+         (pdevice->supports_48bit_addresses ? EXEC_OBJECT_SUPPORTS_48B_ADDRESS 
: 0) |
+         (pdevice->has_exec_async ? EXEC_OBJECT_ASYNC : 0) |
+         (pdevice->use_softpin ? EXEC_OBJECT_PINNED : 0);
+
+      VkResult result = anv_bo_cache_alloc(device, &device->bo_cache,
+                                           pool->inline_blocks_size,
+                                           bo_flags,
+                                           &pool->inline_blocks_bo);
+      if (result != VK_SUCCESS)
+         goto fail_iubo_alloc;
+
+      uint32_t gem_flags = !device->info.has_llc ? I915_MMAP_WC : 0;
+      void *map = anv_gem_mmap(device, pool->inline_blocks_bo->gem_handle,
+                               0, pool->inline_blocks_bo->size, gem_flags);
+      if (map == MAP_FAILED)
+         goto fail_iubo_mmap;
+
+      pool->inline_blocks_bo->map = map;
+   }
+
     anv_state_stream_init(&pool->surface_state_stream,
                           &device->surface_state_pool, 4096);
     pool->surface_state_free_list = NULL;
@@ -442,6 +548,12 @@ VkResult anv_CreateDescriptorPool(
     *pDescriptorPool = anv_descriptor_pool_to_handle(pool);
return VK_SUCCESS;
+
+fail_iubo_mmap:
+   anv_bo_cache_release(device, &device->bo_cache, pool->inline_blocks_bo);
+fail_iubo_alloc:
+   vk_free2(&device->alloc, pAllocator, pool);
+   return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  }
void anv_DestroyDescriptorPool(
@@ -455,6 +567,8 @@ void anv_DestroyDescriptorPool(
     if (!pool)
        return;
+ if (pool->inline_blocks_bo)
+      anv_bo_cache_release(device, &device->bo_cache, pool->inline_blocks_bo);
     anv_state_stream_finish(&pool->surface_state_stream);
     vk_free2(&device->alloc, pAllocator, pool);
  }
@@ -469,6 +583,8 @@ VkResult anv_ResetDescriptorPool(
pool->next_set = 0;
     pool->free_set_list = EMPTY;
+   pool->next_block = 0;
+   pool->free_block_list = EMPTY;
     anv_state_stream_finish(&pool->surface_state_stream);
     anv_state_stream_init(&pool->surface_state_stream,
                           &device->surface_state_pool, 4096);
@@ -496,6 +612,36 @@ struct surface_state_free_list_entry {
     struct anv_state state;
  };
+static struct anv_descriptor_set *
+anv_descriptor_inline_block_alloc(struct anv_descriptor_pool *pool,
+                                  struct anv_descriptor_set_layout *layout,
+                                  struct anv_descriptor_set *set)
+{
+   if (layout->inline_blocks_size == 0) {
+      set->inline_blocks = NULL;
+      return set;
+   }
+
+   if (layout->inline_blocks_size <= pool->inline_blocks_size - 
pool->next_block) {
+      set->inline_blocks = pool->inline_blocks_bo->map + pool->next_block;
+      pool->next_block += layout->inline_blocks_size;
+      return set;
+   }
+
+   struct pool_free_list_entry *entry;
+   uint32_t *link = &pool->free_block_list;
+   for (uint32_t f = pool->free_block_list; f != EMPTY; f = entry->next) {
+      entry = (struct pool_free_list_entry *) (pool->inline_blocks_bo->map + 
f);
+      if (layout->inline_blocks_size <= entry->size) {
+         *link = entry->next;
+         set->inline_blocks = entry;
+         return set;
+      }
+   }
+
+   return NULL;
+}
+
  static struct anv_descriptor_set *
  anv_descriptor_alloc(struct anv_descriptor_pool *pool,
                       struct anv_descriptor_set_layout *layout,
@@ -504,8 +650,10 @@ anv_descriptor_alloc(struct anv_descriptor_pool *pool,
     struct anv_descriptor_set *set = NULL;
if (size <= pool->size - pool->next_set) {
-      set = (struct anv_descriptor_set *) (pool->data + pool->next_set);
-      pool->next_set += size;
+      set = anv_descriptor_inline_block_alloc(pool, layout,
+         (struct anv_descriptor_set *) (pool->data + pool->next_set));
+      if (set)
+         pool->next_set += size;
        return set;
     }
@@ -515,8 +663,10 @@ anv_descriptor_alloc(struct anv_descriptor_pool *pool,
        entry = (struct pool_free_list_entry *) (pool->data + f);
        if (size <= entry->size) {
           uint32_t next = entry->next;
-         set = (struct anv_descriptor_set *) entry;
-         *link = next;
+         set = anv_descriptor_inline_block_alloc(pool, layout,
+            (struct anv_descriptor_set *) entry);
+         if (set)
+            *link = next;
           return set;
        }
        link = &entry->next;
@@ -573,6 +723,18 @@ anv_descriptor_set_create(struct anv_device *device,
        desc += layout->binding[b].array_size;
     }
+ /* Also fill the inline uniforms blocks if needed. */
+   if (layout->inline_blocks_descriptor_index >= 0) {
+      set->descriptors[layout->inline_blocks_descriptor_index] = (struct 
anv_descriptor) {
+         .type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT,
+         .inline_address = (struct anv_address) {
+            .bo = pool->inline_blocks_bo,
+            .offset = set->inline_blocks - pool->inline_blocks_bo->map,
+         },
+         .inline_range = layout->inline_blocks_size,
+      };
+   }
+
     /* Allocate surface state for the buffer views. */
     for (uint32_t b = 0; b < layout->buffer_count; b++) {
        struct surface_state_free_list_entry *entry =
@@ -600,8 +762,6 @@ anv_descriptor_set_destroy(struct anv_device *device,
                             struct anv_descriptor_pool *pool,
                             struct anv_descriptor_set *set)
  {
-   anv_descriptor_set_layout_unref(device, set->layout);
-
     /* Put the buffer view surface state back on the free list. */
     for (uint32_t b = 0; b < set->buffer_count; b++) {
        struct surface_state_free_list_entry *entry =
@@ -611,7 +771,22 @@ anv_descriptor_set_destroy(struct anv_device *device,
        pool->surface_state_free_list = entry;
     }
- /* Put the descriptor set allocation back on the free list. */
+   /* Put the inline uniform blocks back on the free list. */
+   if (set->inline_blocks) {
+      const uint32_t index = set->inline_blocks - pool->inline_blocks_bo->map;
+      if (index + set->layout->inline_blocks_size == pool->next_block) {
+         pool->next_block = index;
+      } else {
+         struct pool_free_list_entry *entry = (struct pool_free_list_entry *) 
set->inline_blocks;
+         entry->next = pool->free_block_list;
+         entry->size = set->layout->inline_blocks_size;
+         pool->free_block_list = (char *) entry - pool->data;
+      }
+   }
+
+   anv_descriptor_set_layout_unref(device, set->layout);
+
+   /* Finally, put the descriptor set allocation back on the free list. */
     const uint32_t index = (char *) set - pool->data;
     if (index + set->size == pool->next_set) {
        pool->next_set = index;
@@ -798,6 +973,23 @@ anv_descriptor_set_write_buffer(struct anv_descriptor_set 
*set,
     }
  }
+static void
+anv_descriptor_set_write_inline_uniforms(struct anv_descriptor_set *set,
+                                         VkDescriptorType type,
+                                         uint32_t binding,
+                                         uint32_t dst_offset,
+                                         const void *data,
+                                         uint32_t data_length)
+{
+   const struct anv_descriptor_set_binding_layout *bind_layout =
+      &set->layout->binding[binding];
+
+   assert(type == bind_layout->type);
+
+   memcpy(set->inline_blocks + bind_layout->inline_block_offset + dst_offset,
+          data, data_length);
+}
+
  void anv_UpdateDescriptorSets(
      VkDevice                                    _device,
      uint32_t                                    descriptorWriteCount,
@@ -826,6 +1018,20 @@ void anv_UpdateDescriptorSets(
           }
           break;
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
+         const VkWriteDescriptorSetInlineUniformBlockEXT *inline_ub =
+            vk_find_struct_const(write->pNext,
+                                 
WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT);
+         assert(inline_ub);
+         anv_descriptor_set_write_inline_uniforms(set,
+                                                  write->descriptorType,
+                                                  write->dstBinding,
+                                                  write->dstArrayElement,
+                                                  inline_ub->pData,
+                                                  inline_ub->dataSize);
+         break;
+      }
+
        case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
        case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
           for (uint32_t j = 0; j < write->descriptorCount; j++) {
@@ -954,6 +1160,16 @@ anv_descriptor_set_write_template(struct 
anv_descriptor_set *set,
           }
           break;
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
+         anv_descriptor_set_write_inline_uniforms(set,
+                                                  entry->type,
+                                                  entry->binding,
+                                                  entry->array_element,
+                                                  data + entry->offset,
+                                                  entry->array_count);
+         break;
+      }
+
        default:
           break;
        }
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 1e37876eb43..5bc9f7e7e88 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -942,6 +942,15 @@ void anv_GetPhysicalDeviceFeatures2(
           break;
        }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
+         VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
+            (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
+
+         features->inlineUniformBlock = true;
+         features->descriptorBindingInlineUniformBlockUpdateAfterBind = false;
+         break;
+      }
+
        default:
           anv_debug_ignored_stype(ext->sType);
           break;
@@ -1198,6 +1207,19 @@ void anv_GetPhysicalDeviceProperties2(
           break;
        }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
+         VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
+            (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
+
+         /* All required minimum values. */
+         props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
+         props->maxPerStageDescriptorInlineUniformBlocks = 
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
+         props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 4;
+         props->maxDescriptorSetInlineUniformBlocks = 4;
+         props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 4;
+         break;
+      }
+
        default:
           anv_debug_ignored_stype(ext->sType);
           break;
diff --git a/src/intel/vulkan/anv_extensions.py 
b/src/intel/vulkan/anv_extensions.py
index 951505a854e..61803c9d7fa 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -125,6 +125,7 @@ EXTENSIONS = [
      Extension('VK_EXT_vertex_attribute_divisor',          3, True),
      Extension('VK_EXT_post_depth_coverage',               1, 'device->info.gen 
>= 9'),
      Extension('VK_EXT_sampler_filter_minmax',             1, 'device->info.gen 
>= 9'),
+    Extension('VK_EXT_inline_uniform_block',              1, True),
  ]
class VkVersion:
diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c 
b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index 856101cc2ff..75bf33806f9 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -160,6 +160,22 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
        block_index = nir_iadd(b, nir_imm_int(b, surface_index), block_index);
     }
+ /* We use a single binding table entry for all inline uniform blocks. That
+    * means each block is at a different offset in that entry. We can get this
+    * offset from the layout using (set, binding) but we need to add that
+    * offset into the actual load_ubo intrinsic.
+    *
+    * Here instead of just setting the block index we set a tuple
+    * (block_index, inline_block_offset) which will be replace when run into a
+    * load_ubo intrinsic (see lower_inline_uniform_block function).
+    */
+   uint32_t inline_block_offset =
+      state->layout->set[set].layout->binding[binding].inline_block_offset;
+   if (inline_block_offset != -1) {
+      block_index = nir_vec2(b, block_index,
+                             nir_imm_int(b, inline_block_offset));
+   }
+
     assert(intrin->dest.is_ssa);
     nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index));
     nir_instr_remove(&intrin->instr);
@@ -268,6 +284,39 @@ lower_load_constant(nir_intrinsic_instr *intrin,
     nir_instr_remove(&intrin->instr);
  }
+static void
+lower_inline_uniform_block(nir_intrinsic_instr *intrin,
+                           struct apply_pipeline_layout_state *state)
+{
+   if (!intrin->src[0].ssa->parent_instr ||
+       intrin->src[0].ssa->parent_instr->type != nir_instr_type_alu)
+      return;
+
+   nir_alu_instr *alu = nir_instr_as_alu(intrin->src[0].ssa->parent_instr);
+   if (alu->op != nir_op_vec2)
+      return;
+
+   nir_builder *b = &state->builder;
+   b->cursor = nir_before_instr(&intrin->instr);
+
+   nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], alu->src[0].src);
+
+   nir_const_value *const_inline_block_offset =
+      nir_src_as_const_value(alu->src[1].src);
+   assert(const_inline_block_offset);
+
+   nir_const_value *const_offset = nir_src_as_const_value(intrin->src[1]);
+   nir_ssa_def *offset;
+   if (const_offset) {
+      offset = nir_iadd(b, nir_imm_int(b, const_offset->u32[0]),
+                        nir_imm_int(b, const_inline_block_offset->u32[0]));
+   } else {
+      offset = nir_iadd(b, nir_ssa_for_src(b, intrin->src[1], 1),
+                        nir_imm_int(b, const_inline_block_offset->u32[0]));
+   }
+   nir_instr_rewrite_src(&intrin->instr, &intrin->src[1], 
nir_src_for_ssa(offset));
+}
+
  static void
  lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type,
                  unsigned *base_index,
@@ -401,6 +450,9 @@ apply_pipeline_layout_block(nir_block *block,
           case nir_intrinsic_load_constant:
              lower_load_constant(intrin, state);
              break;
+         case nir_intrinsic_load_ubo:
+            lower_inline_uniform_block(intrin, state);
+            break;
           default:
              break;
           }
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 372b7c69635..cea8e5786f5 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -159,6 +159,8 @@ struct gen_l3_config;
  #define MAX_DYNAMIC_BUFFERS 16
  #define MAX_IMAGES 8
  #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
+#define MAX_INLINE_UNIFORM_BLOCK_SIZE 256 /* Minimum requirement */
+#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 4 /* Minimum requirement */
/* The kernel relocation API has a limitation of a 32-bit delta value
   * applied to the address before it is written which, in spite of it being
@@ -1394,6 +1396,16 @@ struct anv_descriptor_set_binding_layout {
     /* Index into the descriptor set buffer views */
     int16_t buffer_index;
+ /* TODO/question: should we union fields a bit? inline uniform blocks have
+    * no use for array_size, buffer_index & dynamic_offset_index.
+    */
+
+   /* Offset into the portion of data allocated for the inline uniforms. */
+   uint32_t inline_block_offset;
+
+   /* Length of the portion of data allocated for inline uniforms */
+   uint32_t inline_block_length;
+
     struct {
        /* Index into the binding table for the associated surface */
        int16_t surface_index;
@@ -1428,6 +1440,15 @@ struct anv_descriptor_set_layout {
     /* Number of dynamic offsets used by this descriptor set */
     uint16_t dynamic_offset_count;
+ /* Index into the flattend descriptor set (-1 if unused). */
+   int16_t inline_blocks_descriptor_index;
+
+   /* Data to allocate into the pool descriptor's inline uniforms BO */
+   uint32_t inline_blocks_size;
+
+   /* Index into the binding table for the associated surface */
+   int16_t inline_blocks_surface_indexes[MESA_SHADER_STAGES];
+
     /* Bindings in this descriptor set */
     struct anv_descriptor_set_binding_layout binding[0];
  };
@@ -1464,6 +1485,11 @@ struct anv_descriptor {
           uint64_t range;
        };
+ struct {
+         struct anv_address inline_address;
+         uint64_t inline_range;
+      };
+
        struct anv_buffer_view *buffer_view;
     };
  };
@@ -1472,6 +1498,7 @@ struct anv_descriptor_set {
     struct anv_descriptor_set_layout *layout;
     uint32_t size;
     uint32_t buffer_count;
+   void *inline_blocks;
     struct anv_buffer_view *buffer_views;
     struct anv_descriptor descriptors[0];
  };
@@ -1507,6 +1534,12 @@ struct anv_descriptor_pool {
     struct anv_state_stream surface_state_stream;
     void *surface_state_free_list;
+ struct anv_bo *inline_blocks_bo;
+   uint32_t inline_blocks_size;
+
+   uint32_t free_block_list;
+   uint32_t next_block;
+
     char data[0];
  };
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 80bebf5a12c..5cb4c0f13af 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2146,6 +2146,19 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
                             desc->buffer_view->address);
           break;
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
+         surface_state =
+            anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
+         enum isl_format format =
+            anv_isl_format_for_descriptor_type(desc->type);
+
+         anv_fill_buffer_surface_state(cmd_buffer->device, surface_state,
+                                       format, desc->inline_address,
+                                       desc->inline_range, 1);
+         add_surface_reloc(cmd_buffer, surface_state, desc->inline_address);
+         break;
+      }
+
        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
           /* Compute the offset within the buffer */
@@ -2445,14 +2458,15 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer 
*cmd_buffer,
                    const struct anv_descriptor *desc =
                       anv_descriptor_for_binding(&gfx_state->base, binding);
- if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
+                  switch (desc->type) {
+                  case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
                       read_len = MIN2(range->length,
                          DIV_ROUND_UP(desc->buffer_view->range, 32) - 
range->start);
                       read_addr = anv_address_add(desc->buffer_view->address,
                                                   range->start * 32);
-                  } else {
-                     assert(desc->type == 
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
+                     break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
                       uint32_t dynamic_offset =
                          dynamic_offset_for_binding(&gfx_state->base, binding);
                       uint32_t buf_offset =
@@ -2464,6 +2478,18 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer 
*cmd_buffer,
                          DIV_ROUND_UP(buf_range, 32) - range->start);
                       read_addr = anv_address_add(desc->buffer->address,
                                                   buf_offset + range->start * 
32);
+                     break;
+                  }
+
+                  case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+                     read_len = MIN2(range->length,
+                        DIV_ROUND_UP(desc->inline_range, 32) - range->start);
+                     read_addr = anv_address_add(desc->inline_address,
+                                                 range->start * 32);
+                     break;
+
+                  default:
+                     unreachable("Invalid descriptor");
                    }
                 }
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to