So I have trouble making sense of what did you change but on its own the patch looks good to me. r-b
On Tue, Apr 16, 2019 at 5:26 PM Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > From: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> > > Basically just reserve the memory in the descriptor sets. > > On the shader side we construct a buffer descriptor, since > AFAIU VGPR indexing on 32-bit pointers in LLVM is still broken. > > This fully supports update after bind and variable descriptor set > sizes. However, the limits are somewhat arbitrary and are mostly > about finding a reasonable division of a 2 GiB max memory size over > the set. > > v2: - rebased on top of master (Samuel) > - remove the loading resources rework (Samuel) > - only load UBO descriptors if it's a pointer (Samuel) > - use LLVMBuildPtrToInt to avoid IR failures (Samuel) > --- > src/amd/vulkan/radv_descriptor_set.c | 83 ++++++++++++++++++++++++---- > src/amd/vulkan/radv_device.c | 22 +++++++- > src/amd/vulkan/radv_extensions.py | 1 + > src/amd/vulkan/radv_nir_to_llvm.c | 31 ++++++++++- > src/amd/vulkan/radv_private.h | 2 + > 5 files changed, 124 insertions(+), 15 deletions(-) > > diff --git a/src/amd/vulkan/radv_descriptor_set.c > b/src/amd/vulkan/radv_descriptor_set.c > index 68171b5d244..6c6b88a4553 100644 > --- a/src/amd/vulkan/radv_descriptor_set.c > +++ b/src/amd/vulkan/radv_descriptor_set.c > @@ -127,6 +127,7 @@ VkResult radv_CreateDescriptorSetLayout( > uint32_t b = binding->binding; > uint32_t alignment; > unsigned binding_buffer_count = 0; > + uint32_t descriptor_count = binding->descriptorCount; > > switch (binding->descriptorType) { > case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: > @@ -164,6 +165,11 @@ VkResult radv_CreateDescriptorSetLayout( > set_layout->binding[b].size = 16; > alignment = 16; > break; > + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: > + alignment = 16; > + set_layout->binding[b].size = descriptor_count; > + descriptor_count = 1; > + break; > default: > unreachable("unknown descriptor type\n"); > break; > @@ -171,7 +177,7 @@ VkResult radv_CreateDescriptorSetLayout( > > set_layout->size = align(set_layout->size, alignment); > set_layout->binding[b].type = binding->descriptorType; > - set_layout->binding[b].array_size = binding->descriptorCount; > + set_layout->binding[b].array_size = descriptor_count; > set_layout->binding[b].offset = set_layout->size; > set_layout->binding[b].buffer_offset = buffer_count; > set_layout->binding[b].dynamic_offset_offset = > dynamic_offset_count; > @@ -207,9 +213,9 @@ VkResult radv_CreateDescriptorSetLayout( > samplers_offset += 4 * sizeof(uint32_t) * > binding->descriptorCount; > } > > - set_layout->size += binding->descriptorCount * > set_layout->binding[b].size; > - buffer_count += binding->descriptorCount * > binding_buffer_count; > - dynamic_offset_count += binding->descriptorCount * > + set_layout->size += descriptor_count * > set_layout->binding[b].size; > + buffer_count += descriptor_count * binding_buffer_count; > + dynamic_offset_count += descriptor_count * > set_layout->binding[b].dynamic_offset_count; > set_layout->shader_stages |= binding->stageFlags; > } > @@ -264,6 +270,7 @@ void radv_GetDescriptorSetLayoutSupport(VkDevice device, > > uint64_t descriptor_size = 0; > uint64_t descriptor_alignment = 1; > + uint32_t descriptor_count = binding->descriptorCount; > switch (binding->descriptorType) { > case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: > case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: > @@ -282,7 +289,7 @@ void radv_GetDescriptorSetLayoutSupport(VkDevice device, > descriptor_alignment = 32; > break; > case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: > - if > (!has_equal_immutable_samplers(binding->pImmutableSamplers, > binding->descriptorCount)) { > + if > (!has_equal_immutable_samplers(binding->pImmutableSamplers, > descriptor_count)) { > descriptor_size = 64; > } else { > descriptor_size = 96; > @@ -290,11 +297,16 @@ void radv_GetDescriptorSetLayoutSupport(VkDevice device, > descriptor_alignment = 32; > break; > case VK_DESCRIPTOR_TYPE_SAMPLER: > - if > (!has_equal_immutable_samplers(binding->pImmutableSamplers, > binding->descriptorCount)) { > + if > (!has_equal_immutable_samplers(binding->pImmutableSamplers, > descriptor_count)) { > descriptor_size = 16; > descriptor_alignment = 16; > } > break; > + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: > + descriptor_alignment = 16; > + descriptor_size = descriptor_count; > + descriptor_count = 1; > + break; > default: > unreachable("unknown descriptor type\n"); > break; > @@ -305,18 +317,20 @@ void radv_GetDescriptorSetLayoutSupport(VkDevice device, > } > size = align_u64(size, descriptor_alignment); > > - uint64_t max_count = UINT64_MAX; > - if (descriptor_size) > - max_count = (UINT64_MAX - size) / descriptor_size; > + uint64_t max_count = INT32_MAX; > + if (binding->descriptorType == > VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) > + max_count = INT32_MAX - size; > + else if (descriptor_size) > + max_count = (INT32_MAX - size) / descriptor_size; > > - if (max_count < binding->descriptorCount) { > + if (max_count < descriptor_count) { > supported = false; > } > if (variable_flags && binding->binding > <variable_flags->bindingCount && variable_count && > (variable_flags->pBindingFlags[binding->binding] & > VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) { > variable_count->maxVariableDescriptorCount = > MIN2(UINT32_MAX, max_count); > } > - size += binding->descriptorCount * descriptor_size; > + size += descriptor_count * descriptor_size; > } > > free(bindings); > @@ -543,6 +557,21 @@ VkResult radv_CreateDescriptorPool( > uint64_t size = sizeof(struct radv_descriptor_pool); > uint64_t bo_size = 0, bo_count = 0, range_count = 0; > > + vk_foreach_struct(ext, pCreateInfo->pNext) { > + switch (ext->sType) { > + case > VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT: { > + const struct > VkDescriptorPoolInlineUniformBlockCreateInfoEXT *info = > + (const struct > VkDescriptorPoolInlineUniformBlockCreateInfoEXT*)ext; > + /* the sizes are 4 aligned, and we need to align to at > + * most 32, which needs at most 28 bytes extra per > + * binding. */ > + bo_size += 28llu * > info->maxInlineUniformBlockBindings; > + break; > + } > + default: > + break; > + } > + } > > for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) { > if (pCreateInfo->pPoolSizes[i].type != > VK_DESCRIPTOR_TYPE_SAMPLER) > @@ -569,6 +598,9 @@ VkResult radv_CreateDescriptorPool( > case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: > bo_size += 96 * > pCreateInfo->pPoolSizes[i].descriptorCount; > break; > + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: > + bo_size += pCreateInfo->pPoolSizes[i].descriptorCount; > + break; > default: > unreachable("unknown descriptor type\n"); > break; > @@ -764,6 +796,17 @@ static void write_buffer_descriptor(struct radv_device > *device, > *buffer_list = buffer->bo; > } > > +static void write_block_descriptor(struct radv_device *device, > + struct radv_cmd_buffer *cmd_buffer, > + void *dst, > + const VkWriteDescriptorSet *writeset) > +{ > + const VkWriteDescriptorSetInlineUniformBlockEXT *inline_ub = > + vk_find_struct_const(writeset->pNext, > WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT); > + > + memcpy(dst, inline_ub->pData, inline_ub->dataSize); > +} > + > static void write_dynamic_buffer_descriptor(struct radv_device *device, > struct radv_descriptor_range > *range, > struct radeon_winsys_bo > **buffer_list, > @@ -862,6 +905,12 @@ void radv_update_descriptor_sets( > const uint32_t *samplers = > radv_immutable_samplers(set->layout, binding_layout); > > ptr += binding_layout->offset / 4; > + > + if (writeset->descriptorType == > VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { > + write_block_descriptor(device, cmd_buffer, > (uint8_t*)ptr + writeset->dstArrayElement, writeset); > + continue; > + } > + > ptr += binding_layout->size * writeset->dstArrayElement / 4; > buffer_list += binding_layout->buffer_offset; > buffer_list += writeset->dstArrayElement; > @@ -1042,7 +1091,12 @@ VkResult radv_CreateDescriptorUpdateTemplate(VkDevice > _device, > default: > break; > } > - dst_offset = binding_layout->offset / 4 + > binding_layout->size * entry->dstArrayElement / 4; > + dst_offset = binding_layout->offset / 4; > + if (entry->descriptorType == > VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) > + dst_offset += entry->dstArrayElement / 4; > + else > + dst_offset += binding_layout->size * > entry->dstArrayElement / 4; > + > dst_stride = binding_layout->size / 4; > break; > } > @@ -1092,6 +1146,11 @@ void radv_update_descriptor_set_with_template(struct > radv_device *device, > const uint8_t *pSrc = ((const uint8_t *) pData) + > templ->entry[i].src_offset; > uint32_t j; > > + if (templ->entry[i].descriptor_type == > VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { > + memcpy((uint8_t*)pDst, pSrc, > templ->entry[i].descriptor_count); > + continue; > + } > + > for (j = 0; j < templ->entry[i].descriptor_count; ++j) { > switch (templ->entry[i].descriptor_type) { > case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: > diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c > index 1f77dcadb17..db42d5d49b3 100644 > --- a/src/amd/vulkan/radv_device.c > +++ b/src/amd/vulkan/radv_device.c > @@ -906,6 +906,14 @@ void radv_GetPhysicalDeviceFeatures2( > features->shaderInt8 = true; > break; > } > + case > VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: { > + VkPhysicalDeviceInlineUniformBlockFeaturesEXT > *features = > + > (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext; > + > + features->inlineUniformBlock = true; > + > features->descriptorBindingInlineUniformBlockUpdateAfterBind = true; > + break; > + } > default: > break; > } > @@ -1200,7 +1208,8 @@ void radv_GetPhysicalDeviceProperties2( > properties->robustBufferAccessUpdateAfterBind = false; > properties->quadDivergentImplicitLod = false; > > - size_t max_descriptor_set_size = ((1ull << 31) - 16 * > MAX_DYNAMIC_BUFFERS) / > + size_t max_descriptor_set_size = ((1ull << 31) - 16 * > MAX_DYNAMIC_BUFFERS - > + MAX_INLINE_UNIFORM_BLOCK_SIZE * > MAX_INLINE_UNIFORM_BLOCK_COUNT) / > (32 /* uniform buffer, 32 due to potential > space wasted on alignment */ + > 32 /* storage buffer, 32 due to potential > space wasted on alignment */ + > 32 /* sampler, largest when combined with > image */ + > @@ -1288,6 +1297,17 @@ void radv_GetPhysicalDeviceProperties2( > properties->transformFeedbackDraw = true; > break; > } > + case > VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: { > + VkPhysicalDeviceInlineUniformBlockPropertiesEXT > *props = > + > (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext; > + > + props->maxInlineUniformBlockSize = > MAX_INLINE_UNIFORM_BLOCK_SIZE; > + props->maxPerStageDescriptorInlineUniformBlocks = > MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS; > + > props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = > MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS; > + props->maxDescriptorSetInlineUniformBlocks = > MAX_INLINE_UNIFORM_BLOCK_COUNT; > + > props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = > MAX_INLINE_UNIFORM_BLOCK_COUNT; > + break; > + } > default: > break; > } > diff --git a/src/amd/vulkan/radv_extensions.py > b/src/amd/vulkan/radv_extensions.py > index 13fe391e623..034d55df7c5 100644 > --- a/src/amd/vulkan/radv_extensions.py > +++ b/src/amd/vulkan/radv_extensions.py > @@ -111,6 +111,7 @@ EXTENSIONS = [ > Extension('VK_EXT_external_memory_host', 1, > 'device->rad_info.has_userptr'), > Extension('VK_EXT_global_priority', 1, > 'device->rad_info.has_ctx_priority'), > Extension('VK_EXT_host_query_reset', 1, True), > + Extension('VK_EXT_inline_uniform_block', 1, True), > Extension('VK_EXT_memory_budget', 1, True), > Extension('VK_EXT_memory_priority', 1, True), > Extension('VK_EXT_pci_bus_info', 2, True), > diff --git a/src/amd/vulkan/radv_nir_to_llvm.c > b/src/amd/vulkan/radv_nir_to_llvm.c > index 58a3cf18fe1..5bc88298ee6 100644 > --- a/src/amd/vulkan/radv_nir_to_llvm.c > +++ b/src/amd/vulkan/radv_nir_to_llvm.c > @@ -1305,13 +1305,35 @@ radv_load_resource(struct ac_shader_abi *abi, > LLVMValueRef index, > } else > stride = LLVMConstInt(ctx->ac.i32, > layout->binding[binding].size, false); > > - offset = ac_build_imad(&ctx->ac, index, stride, > - LLVMConstInt(ctx->ac.i32, base_offset, false)); > + offset = LLVMConstInt(ctx->ac.i32, base_offset, false); > + > + if (layout->binding[binding].type != > VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { > + offset = ac_build_imad(&ctx->ac, index, stride, offset); > + } > > desc_ptr = ac_build_gep0(&ctx->ac, desc_ptr, offset); > desc_ptr = ac_cast_ptr(&ctx->ac, desc_ptr, ctx->ac.v4i32); > LLVMSetMetadata(desc_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); > > + if (layout->binding[binding].type == > VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { > + uint32_t desc_type = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | > + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | > + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | > + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | > + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | > + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); > + > + LLVMValueRef desc_components[4] = { > + LLVMBuildPtrToInt(ctx->ac.builder, desc_ptr, > ctx->ac.intptr, ""), > + LLVMConstInt(ctx->ac.i32, > S_008F04_BASE_ADDRESS_HI(ctx->options->address32_hi), false), > + /* High limit to support variable sizes. */ > + LLVMConstInt(ctx->ac.i32, 0xffffffff, false), > + LLVMConstInt(ctx->ac.i32, desc_type, false), > + }; > + > + return ac_build_gather_values(&ctx->ac, desc_components, 4); > + } > + > return desc_ptr; > } > > @@ -1910,6 +1932,11 @@ static LLVMValueRef radv_load_ubo(struct ac_shader_abi > *abi, LLVMValueRef buffer > struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); > LLVMValueRef result; > > + if (LLVMGetTypeKind(LLVMTypeOf(buffer_ptr)) != LLVMPointerTypeKind) { > + /* Do not load the descriptor for inlined uniform blocks. */ > + return buffer_ptr; > + } > + > LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, > ctx->ac.empty_md); > > result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, ""); > diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h > index 82ab4eff8ca..cd3af7e614d 100644 > --- a/src/amd/vulkan/radv_private.h > +++ b/src/amd/vulkan/radv_private.h > @@ -102,6 +102,8 @@ typedef uint32_t xcb_window_t; > #define MAX_SO_STREAMS 4 > #define MAX_SO_BUFFERS 4 > #define MAX_SO_OUTPUTS 64 > +#define MAX_INLINE_UNIFORM_BLOCK_SIZE (4ull * 1024 * 1024) > +#define MAX_INLINE_UNIFORM_BLOCK_COUNT 64 > > #define NUM_DEPTH_CLEAR_PIPELINES 3 > > -- > 2.21.0 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev