Basically just reserve the memory in the descriptor sets.

On the shader side we construct a buffer descriptor, since
AFAIU VGPR indexing on 32-bit pointers in LLVM is still broken.

This fully supports update after bind and variable descriptor set
sizes. However, the limits are somewhat arbitrary and are mostly
about finding a reasonable division of a 2 GiB max memory size over
the set.
---
 src/amd/vulkan/radv_descriptor_set.c | 83 ++++++++++++++++++++++++----
 src/amd/vulkan/radv_device.c         | 22 +++++++-
 src/amd/vulkan/radv_extensions.py    |  1 +
 src/amd/vulkan/radv_nir_to_llvm.c    | 31 +++++++++--
 src/amd/vulkan/radv_private.h        |  2 +
 5 files changed, 122 insertions(+), 17 deletions(-)

diff --git a/src/amd/vulkan/radv_descriptor_set.c 
b/src/amd/vulkan/radv_descriptor_set.c
index c4341f6ac52..bbdc866852a 100644
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -125,6 +125,7 @@ VkResult radv_CreateDescriptorSetLayout(
                uint32_t b = binding->binding;
                uint32_t alignment;
                unsigned binding_buffer_count = 0;
+               uint32_t descriptor_count = binding->descriptorCount;
 
                switch (binding->descriptorType) {
                case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -162,6 +163,11 @@ VkResult radv_CreateDescriptorSetLayout(
                        set_layout->binding[b].size = 16;
                        alignment = 16;
                        break;
+               case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+                       alignment = 16;
+                       set_layout->binding[b].size = descriptor_count;
+                       descriptor_count = 1;
+                       break;
                default:
                        unreachable("unknown descriptor type\n");
                        break;
@@ -169,7 +175,7 @@ VkResult radv_CreateDescriptorSetLayout(
 
                set_layout->size = align(set_layout->size, alignment);
                set_layout->binding[b].type = binding->descriptorType;
-               set_layout->binding[b].array_size = binding->descriptorCount;
+               set_layout->binding[b].array_size = descriptor_count;
                set_layout->binding[b].offset = set_layout->size;
                set_layout->binding[b].buffer_offset = buffer_count;
                set_layout->binding[b].dynamic_offset_offset = 
dynamic_offset_count;
@@ -203,9 +209,9 @@ VkResult radv_CreateDescriptorSetLayout(
                        samplers_offset += 4 * sizeof(uint32_t) * 
binding->descriptorCount;
                }
 
-               set_layout->size += binding->descriptorCount * 
set_layout->binding[b].size;
-               buffer_count += binding->descriptorCount * binding_buffer_count;
-               dynamic_offset_count += binding->descriptorCount *
+               set_layout->size += descriptor_count * 
set_layout->binding[b].size;
+               buffer_count += descriptor_count * binding_buffer_count;
+               dynamic_offset_count += descriptor_count *
                        set_layout->binding[b].dynamic_offset_count;
                set_layout->shader_stages |= binding->stageFlags;
        }
@@ -260,6 +266,7 @@ void radv_GetDescriptorSetLayoutSupport(VkDevice device,
 
                uint64_t descriptor_size = 0;
                uint64_t descriptor_alignment = 1;
+               uint32_t descriptor_count = binding->descriptorCount;
                switch (binding->descriptorType) {
                case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
                case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
@@ -278,7 +285,7 @@ void radv_GetDescriptorSetLayoutSupport(VkDevice device,
                        descriptor_alignment = 32;
                        break;
                case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-                       if 
(!has_equal_immutable_samplers(binding->pImmutableSamplers, 
binding->descriptorCount)) {
+                       if 
(!has_equal_immutable_samplers(binding->pImmutableSamplers, descriptor_count)) {
                                descriptor_size = 64;
                        } else {
                                descriptor_size = 96;
@@ -286,11 +293,16 @@ void radv_GetDescriptorSetLayoutSupport(VkDevice device,
                        descriptor_alignment = 32;
                        break;
                case VK_DESCRIPTOR_TYPE_SAMPLER:
-                       if 
(!has_equal_immutable_samplers(binding->pImmutableSamplers, 
binding->descriptorCount)) {
+                       if 
(!has_equal_immutable_samplers(binding->pImmutableSamplers, descriptor_count)) {
                                descriptor_size = 16;
                                descriptor_alignment = 16;
                        }
                        break;
+               case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+                       descriptor_alignment = 16;
+                       descriptor_size = descriptor_count;
+                       descriptor_count = 1;
+                       break;
                default:
                        unreachable("unknown descriptor type\n");
                        break;
@@ -301,18 +313,20 @@ void radv_GetDescriptorSetLayoutSupport(VkDevice device,
                }
                size = align_u64(size, descriptor_alignment);
 
-               uint64_t max_count = UINT64_MAX;
-               if (descriptor_size)
-                       max_count = (UINT64_MAX - size) / descriptor_size;
+               uint64_t max_count = INT32_MAX;
+               if (binding->descriptorType == 
VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
+                       max_count = INT32_MAX - size;
+               else if (descriptor_size)
+                       max_count = (INT32_MAX - size) / descriptor_size;
 
-               if (max_count < binding->descriptorCount) {
+               if (max_count < descriptor_count) {
                        supported = false;
                }
                if (variable_flags && binding->binding 
<variable_flags->bindingCount && variable_count &&
                    (variable_flags->pBindingFlags[binding->binding] & 
VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) {
                        variable_count->maxVariableDescriptorCount = 
MIN2(UINT32_MAX, max_count);
                }
-               size += binding->descriptorCount * descriptor_size;
+               size += descriptor_count * descriptor_size;
        }
 
        free(bindings);
@@ -536,6 +550,21 @@ VkResult radv_CreateDescriptorPool(
        int size = sizeof(struct radv_descriptor_pool);
        uint64_t bo_size = 0, bo_count = 0, range_count = 0;
 
+       vk_foreach_struct(ext, pCreateInfo->pNext) {
+               switch (ext->sType) {
+               case 
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT: {
+                       const struct 
VkDescriptorPoolInlineUniformBlockCreateInfoEXT *info =
+                               (const struct 
VkDescriptorPoolInlineUniformBlockCreateInfoEXT*)ext;
+                       /* the sizes are 4 aligned, and we need to align to at
+                        * most 32, which needs at most 28 bytes extra per
+                        * binding. */
+                       bo_size += 28llu * info->maxInlineUniformBlockBindings;
+                       break;
+               }
+               default:
+                       break;
+               }
+       }
 
        for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
                if (pCreateInfo->pPoolSizes[i].type != 
VK_DESCRIPTOR_TYPE_SAMPLER)
@@ -562,6 +591,9 @@ VkResult radv_CreateDescriptorPool(
                case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
                        bo_size += 96 * 
pCreateInfo->pPoolSizes[i].descriptorCount;
                        break;
+               case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+                       bo_size += pCreateInfo->pPoolSizes[i].descriptorCount;
+                       break;
                default:
                        unreachable("unknown descriptor type\n");
                        break;
@@ -756,6 +788,17 @@ static void write_buffer_descriptor(struct radv_device 
*device,
                *buffer_list = buffer->bo;
 }
 
+static void write_block_descriptor(struct radv_device *device,
+                                   struct radv_cmd_buffer *cmd_buffer,
+                                   void *dst,
+                                   const VkWriteDescriptorSet *writeset)
+{
+       const VkWriteDescriptorSetInlineUniformBlockEXT *inline_ub =
+               vk_find_struct_const(writeset->pNext, 
WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT);
+
+       memcpy(dst, inline_ub->pData, inline_ub->dataSize);
+}
+
 static void write_dynamic_buffer_descriptor(struct radv_device *device,
                                             struct radv_descriptor_range 
*range,
                                             struct radeon_winsys_bo 
**buffer_list,
@@ -854,6 +897,12 @@ void radv_update_descriptor_sets(
                const uint32_t *samplers = radv_immutable_samplers(set->layout, 
binding_layout);
 
                ptr += binding_layout->offset / 4;
+
+               if (writeset->descriptorType == 
VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+                       write_block_descriptor(device, cmd_buffer, 
(uint8_t*)ptr + writeset->dstArrayElement, writeset);
+                       continue;
+               }
+
                ptr += binding_layout->size * writeset->dstArrayElement / 4;
                buffer_list += binding_layout->buffer_offset;
                buffer_list += writeset->dstArrayElement;
@@ -1032,7 +1081,12 @@ VkResult radv_CreateDescriptorUpdateTemplate(VkDevice 
_device,
                        default:
                                break;
                        }
-                       dst_offset = binding_layout->offset / 4 + 
binding_layout->size * entry->dstArrayElement / 4;
+                       dst_offset = binding_layout->offset / 4;
+                       if (entry->descriptorType == 
VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
+                               dst_offset += entry->dstArrayElement / 4;
+                       else
+                               dst_offset += binding_layout->size * 
entry->dstArrayElement / 4;
+
                        dst_stride = binding_layout->size / 4;
                        break;
                }
@@ -1082,6 +1136,11 @@ void radv_update_descriptor_set_with_template(struct 
radv_device *device,
                const uint8_t *pSrc = ((const uint8_t *) pData) + 
templ->entry[i].src_offset;
                uint32_t j;
 
+               if (templ->entry[i].descriptor_type == 
VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+                       memcpy((uint8_t*)pDst, pSrc, 
templ->entry[i].descriptor_count);
+                       continue;
+               }
+
                for (j = 0; j < templ->entry[i].descriptor_count; ++j) {
                        switch (templ->entry[i].descriptor_type) {
                        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index f9169d9d012..99d1dcd3588 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -855,6 +855,14 @@ void radv_GetPhysicalDeviceFeatures2(
                        features->vertexAttributeInstanceRateZeroDivisor = 
VK_TRUE;
                        break;
                }
+               case 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
+                       VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features 
=
+                               (VkPhysicalDeviceInlineUniformBlockFeaturesEXT 
*)ext;
+
+                       features->inlineUniformBlock = true;
+                       
features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
+                       break;
+               }
                default:
                        break;
                }
@@ -1150,7 +1158,8 @@ void radv_GetPhysicalDeviceProperties2(
                        properties->robustBufferAccessUpdateAfterBind = false;
                        properties->quadDivergentImplicitLod = false;
 
-                       size_t max_descriptor_set_size = ((1ull << 31) - 16 * 
MAX_DYNAMIC_BUFFERS) /
+                       size_t max_descriptor_set_size = ((1ull << 31) - 16 * 
MAX_DYNAMIC_BUFFERS -
+                               MAX_INLINE_UNIFORM_BLOCK_SIZE * 
MAX_INLINE_UNIFORM_BLOCK_COUNT) /
                                  (32 /* uniform buffer, 32 due to potential 
space wasted on alignment */ +
                                   32 /* storage buffer, 32 due to potential 
space wasted on alignment */ +
                                   32 /* sampler, largest when combined with 
image */ +
@@ -1193,6 +1202,17 @@ void radv_GetPhysicalDeviceProperties2(
                        properties->conservativeRasterizationPostDepthCoverage 
= VK_FALSE;
                        break;
                }
+               case 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
+                       VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
+                               
(VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
+
+                       props->maxInlineUniformBlockSize = 
MAX_INLINE_UNIFORM_BLOCK_SIZE;
+                       props->maxPerStageDescriptorInlineUniformBlocks = 
MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
+                       
props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 
MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
+                       props->maxDescriptorSetInlineUniformBlocks = 
MAX_INLINE_UNIFORM_BLOCK_COUNT;
+                       
props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 
MAX_INLINE_UNIFORM_BLOCK_COUNT;
+                       break;
+               }
                default:
                        break;
                }
diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index 584926df390..980745420b4 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -103,6 +103,7 @@ EXTENSIONS = [
     Extension('VK_EXT_external_memory_dma_buf',           1, True),
     Extension('VK_EXT_external_memory_host',              1, 
'device->rad_info.has_userptr'),
     Extension('VK_EXT_global_priority',                   1, 
'device->rad_info.has_ctx_priority'),
+    Extension('VK_EXT_inline_uniform_block',              1, True),
     Extension('VK_EXT_sampler_filter_minmax',             1, 
'device->rad_info.chip_class >= CIK'),
     Extension('VK_EXT_shader_viewport_index_layer',       1, True),
     Extension('VK_EXT_shader_stencil_export',             1, True),
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 8b7113efd28..a8e8946a495 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -1222,15 +1222,38 @@ radv_load_resource(struct radv_shader_context *ctx, 
LLVMValueRef index,
        } else
                stride = LLVMConstInt(ctx->ac.i32, 
layout->binding[binding].size, false);
 
-       offset = ac_build_imad(&ctx->ac, index, stride,
-                              LLVMConstInt(ctx->ac.i32, base_offset, false));
+       offset = LLVMConstInt(ctx->ac.i32, base_offset, false);
+
+       if (layout->binding[binding].type != 
VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+               offset = ac_build_imad(&ctx->ac, index, stride, offset);
+       }
 
        desc_ptr = ac_build_gep0(&ctx->ac, desc_ptr, offset);
        desc_ptr = ac_cast_ptr(&ctx->ac, desc_ptr, ctx->ac.v4i32);
        LLVMSetMetadata(desc_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
 
-       LLVMValueRef desc = LLVMBuildLoad(ctx->ac.builder, desc_ptr, "");
-       LLVMSetMetadata(desc, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
+       LLVMValueRef desc;
+       if (layout->binding[binding].type == 
VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+               uint32_t desc_type = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+                       S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                       S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+                       S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+                       S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                       S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+               LLVMValueRef desc_components[4] = {
+                       LLVMBuildBitCast(ctx->ac.builder, desc_ptr, 
ctx->ac.i32, ""),
+                       LLVMConstInt(ctx->ac.i32, 
S_008F04_BASE_ADDRESS_HI(ctx->options->address32_hi), false),
+                       /* High limit to support variable sizes. */
+                       LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
+                       LLVMConstInt(ctx->ac.i32, desc_type, false),
+               };
+
+               desc = ac_build_gather_values(&ctx->ac, desc_components, 4);
+       } else {
+               desc = LLVMBuildLoad(ctx->ac.builder, desc_ptr, "");
+               LLVMSetMetadata(desc, ctx->ac.invariant_load_md_kind, 
ctx->ac.empty_md);
+       }
 
        return desc;
 }
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index a649835959d..cd3f44e8a27 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -99,6 +99,8 @@ typedef uint32_t xcb_window_t;
 #define NUM_META_FS_KEYS 12
 #define RADV_MAX_DRM_DEVICES 8
 #define MAX_VIEWS        8
+#define MAX_INLINE_UNIFORM_BLOCK_SIZE (4ull * 1024 * 1024)
+#define MAX_INLINE_UNIFORM_BLOCK_COUNT 64
 
 #define NUM_DEPTH_CLEAR_PIPELINES 3
 
-- 
2.19.0

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to