Module: Mesa Branch: main Commit: 63278778c649bf373468d280742164b9a3c0a791 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=63278778c649bf373468d280742164b9a3c0a791
Author: Karmjit Mahil <[email protected]> Date: Mon Dec 12 15:45:17 2022 +0000 pvr: Add push consts support to descriptor program. Signed-off-by: Karmjit Mahil <[email protected]> Reviewed-by: Luigi Santivetti <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21387> --- src/imagination/vulkan/pds/pvr_pds.h | 1 + src/imagination/vulkan/pvr_cmd_buffer.c | 95 ++++++++++++++++++++++++++--- src/imagination/vulkan/pvr_common.h | 11 ++++ src/imagination/vulkan/pvr_descriptor_set.c | 17 +++++- src/imagination/vulkan/pvr_pipeline.c | 20 ++++-- src/imagination/vulkan/pvr_private.h | 6 ++ 6 files changed, 135 insertions(+), 15 deletions(-) diff --git a/src/imagination/vulkan/pds/pvr_pds.h b/src/imagination/vulkan/pds/pvr_pds.h index ccf95e05bb1..37c74856e4c 100644 --- a/src/imagination/vulkan/pds/pvr_pds.h +++ b/src/imagination/vulkan/pds/pvr_pds.h @@ -98,6 +98,7 @@ enum pvr_pds_vertex_attrib_program_type { enum pvr_pds_addr_literal_type { PVR_PDS_ADDR_LITERAL_DESC_SET_ADDRS_TABLE, + PVR_PDS_ADDR_LITERAL_PUSH_CONSTS, }; /***************************************************************************** diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 8a95f7424db..7f0f7ad0d1a 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -2308,6 +2308,7 @@ void pvr_CmdPushConstants(VkCommandBuffer commandBuffer, memcpy(&state->push_constants.data[offset], pValues, size); state->push_constants.dirty_stages |= stageFlags; + state->push_constants.uploaded = false; } static VkResult @@ -3278,6 +3279,21 @@ pvr_process_addr_literal(struct pvr_cmd_buffer *cmd_buffer, break; } + case PVR_PDS_ADDR_LITERAL_PUSH_CONSTS: { + const struct pvr_pipeline_layout *layout = + PVR_SELECT(cmd_buffer->state.gfx_pipeline->base.layout, + cmd_buffer->state.gfx_pipeline->base.layout, + cmd_buffer->state.compute_pipeline->base.layout); + const uint32_t push_constants_offset = + PVR_SELECT(layout->vert_push_constants_offset, + layout->frag_push_constants_offset, + layout->compute_push_constants_offset); + + *addr_out = PVR_DEV_ADDR_OFFSET(cmd_buffer->state.push_constants.dev_addr, + push_constants_offset); + break; + } + default: unreachable("Invalid add literal type."); } @@ -3751,6 +3767,49 @@ static void pvr_compute_update_kernel( pvr_compute_generate_control_stream(csb, sub_cmd, &info); } +static VkResult pvr_cmd_upload_push_consts(struct pvr_cmd_buffer *cmd_buffer) +{ + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + struct pvr_bo *bo; + VkResult result; + + /* TODO: Here are some possible optimizations/things to consider: + * + * - Currently we upload maxPushConstantsSize. The application might only + * be using a portion of that so we might end up with unused memory. + * Should we be smarter about this. If we intend to upload the push + * consts into shareds, we definitely want to do avoid reserving unused + * regs. + * + * - For now we have to upload to a new buffer each time since the shaders + * access the push constants from memory. If we were to reuse the same + * buffer we might update the contents out of sync with job submission + * and the shaders will see the updated contents while the command + * buffer was still being recorded and not yet submitted. + * If we were to upload the push constants directly to shared regs we + * could reuse the same buffer (avoiding extra allocation overhead) + * since the contents will be DMAed only on job submission when the + * control stream is processed and the PDS program is executed. This + * approach would also allow us to avoid regenerating the PDS data + * section in some cases since the buffer address will be constants. + */ + + if (cmd_buffer->state.push_constants.uploaded) + return VK_SUCCESS; + + result = pvr_cmd_buffer_upload_general(cmd_buffer, + state->push_constants.data, + sizeof(state->push_constants.data), + &bo); + if (result != VK_SUCCESS) + return result; + + cmd_buffer->state.push_constants.dev_addr = bo->vma->dev_addr; + cmd_buffer->state.push_constants.uploaded = true; + + return VK_SUCCESS; +} + static void pvr_cmd_dispatch( struct pvr_cmd_buffer *const cmd_buffer, const pvr_dev_addr_t indirect_addr, @@ -3759,8 +3818,6 @@ static void pvr_cmd_dispatch( struct pvr_cmd_buffer_state *state = &cmd_buffer->state; const struct pvr_compute_pipeline *compute_pipeline = state->compute_pipeline; - const VkShaderStageFlags push_consts_stage_mask = - compute_pipeline->base.layout->push_constants_shader_stages; struct pvr_sub_cmd_compute *sub_cmd; VkResult result; @@ -3770,11 +3827,15 @@ static void pvr_cmd_dispatch( sub_cmd->uses_atomic_ops |= compute_pipeline->shader_state.uses_atomic_ops; sub_cmd->uses_barrier |= compute_pipeline->shader_state.uses_barrier; - if (push_consts_stage_mask & VK_SHADER_STAGE_COMPUTE_BIT) { - /* TODO: Add a dirty push constants mask in the cmd_buffer state and - * check for dirty compute stage. - */ - pvr_finishme("Add support for push constants."); + if (state->push_constants.dirty_stages & VK_SHADER_STAGE_COMPUTE_BIT) { + result = pvr_cmd_upload_push_consts(cmd_buffer); + if (result != VK_SUCCESS) + return; + + /* Regenerate the PDS program to use the new push consts buffer. */ + state->dirty.compute_desc_dirty = true; + + state->push_constants.dirty_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT; } if (compute_pipeline->shader_state.uses_num_workgroups) { @@ -5042,6 +5103,11 @@ pvr_ppp_state_update_required(const struct pvr_cmd_buffer *cmd_buffer) const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; const struct PVRX(TA_STATE_HEADER) *const header = &state->emit_header; + /* For push constants we only need to worry if they are updated for the + * fragment stage since we're only updating the pds programs used in the + * fragment stage. + */ + return header->pres_ppp_ctrl || header->pres_ispctl || header->pres_ispctl_fb || header->pres_ispctl_ba || header->pres_ispctl_bb || header->pres_ispctl_dbsc || @@ -5053,6 +5119,7 @@ pvr_ppp_state_update_required(const struct pvr_cmd_buffer *cmd_buffer) header->pres_varying_word2 || header->pres_stream_out_program || state->dirty.fragment_descriptors || state->dirty.gfx_pipeline_binding || state->dirty.isp_userpass || + state->push_constants.dirty_stages & VK_SHADER_STAGE_FRAGMENT_BIT || BITSET_TEST(dynamic_dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) || BITSET_TEST(dynamic_dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) || BITSET_TEST(dynamic_dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) || @@ -5451,7 +5518,11 @@ static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer) pvr_setup_vertex_buffers(cmd_buffer, gfx_pipeline); } - /* TODO: Check for dirty push constants */ + if (state->push_constants.dirty_stages & VK_SHADER_STAGE_ALL_GRAPHICS) { + result = pvr_cmd_upload_push_consts(cmd_buffer); + if (result != VK_SUCCESS) + return result; + } state->dirty.vertex_descriptors = state->dirty.gfx_pipeline_binding; state->dirty.fragment_descriptors = state->dirty.vertex_descriptors; @@ -5468,6 +5539,12 @@ static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer) if (BITSET_TEST(dynamic_state->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) state->dirty.fragment_descriptors = true; + state->dirty.vertex_descriptors |= + state->push_constants.dirty_stages & + (VK_SHADER_STAGE_ALL_GRAPHICS & ~VK_SHADER_STAGE_FRAGMENT_BIT); + state->dirty.fragment_descriptors |= state->push_constants.dirty_stages & + VK_SHADER_STAGE_FRAGMENT_BIT; + if (state->dirty.fragment_descriptors) { result = pvr_setup_descriptor_mappings( cmd_buffer, @@ -5513,6 +5590,8 @@ static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer) state->dirty.vertex_bindings = false; state->dirty.vis_test = false; + state->push_constants.dirty_stages &= ~VK_SHADER_STAGE_ALL_GRAPHICS; + return VK_SUCCESS; } diff --git a/src/imagination/vulkan/pvr_common.h b/src/imagination/vulkan/pvr_common.h index 01cac1ed15a..3ffd4ede316 100644 --- a/src/imagination/vulkan/pvr_common.h +++ b/src/imagination/vulkan/pvr_common.h @@ -339,6 +339,14 @@ struct pvr_sh_reg_layout { bool present; uint32_t offset; } descriptor_set_addrs_table; + + /* If this is present, it will always take up 2 sh regs in size and contain + * the device address of the push constants buffer. + */ + struct { + bool present; + uint32_t offset; + } push_consts; }; struct pvr_pipeline_layout { @@ -349,6 +357,9 @@ struct pvr_pipeline_layout { struct pvr_descriptor_set_layout *set_layout[PVR_MAX_DESCRIPTOR_SETS]; VkShaderStageFlags push_constants_shader_stages; + uint32_t vert_push_constants_offset; + uint32_t frag_push_constants_offset; + uint32_t compute_push_constants_offset; /* Mask of enum pvr_stage_allocation. */ uint8_t shader_stage_mask; diff --git a/src/imagination/vulkan/pvr_descriptor_set.c b/src/imagination/vulkan/pvr_descriptor_set.c index fed709d3bae..5681ff864db 100644 --- a/src/imagination/vulkan/pvr_descriptor_set.c +++ b/src/imagination/vulkan/pvr_descriptor_set.c @@ -985,10 +985,25 @@ VkResult pvr_CreatePipelineLayout(VkDevice _device, } layout->push_constants_shader_stages = 0; - for (uint32_t i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) { + for (uint32_t i = 0; i < pCreateInfo->pushConstantRangeCount; i++) { const VkPushConstantRange *range = &pCreateInfo->pPushConstantRanges[i]; layout->push_constants_shader_stages |= range->stageFlags; + + /* From the Vulkan spec. 1.3.237 + * VUID-VkPipelineLayoutCreateInfo-pPushConstantRanges-00292 : + * + * "Any two elements of pPushConstantRanges must not include the same + * stage in stageFlags" + */ + if (range->stageFlags & VK_SHADER_STAGE_VERTEX_BIT) + layout->vert_push_constants_offset = range->offset; + + if (range->stageFlags & VK_SHADER_STAGE_FRAGMENT_BIT) + layout->frag_push_constants_offset = range->offset; + + if (range->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) + layout->compute_push_constants_offset = range->offset; } #if defined(DEBUG) diff --git a/src/imagination/vulkan/pvr_pipeline.c b/src/imagination/vulkan/pvr_pipeline.c index a319c97b2e0..6c513db67d1 100644 --- a/src/imagination/vulkan/pvr_pipeline.c +++ b/src/imagination/vulkan/pvr_pipeline.c @@ -730,9 +730,13 @@ static VkResult pvr_pds_descriptor_program_create_and_upload( addr_literals++; } - /* TODO: Add support for other allocation types. E.g. blend constants - * and push constants. - */ + if (sh_reg_layout->push_consts.present) { + program.addr_literals[addr_literals] = (struct pvr_pds_addr_literal){ + .type = PVR_PDS_ADDR_LITERAL_PUSH_CONSTS, + .destination = sh_reg_layout->push_consts.offset, + }; + addr_literals++; + } program.addr_literal_count = addr_literals; } @@ -1105,9 +1109,13 @@ pvr_pipeline_alloc_shareds(const struct pvr_device *device, next_free_sh_reg += PVR_DEV_ADDR_SIZE_IN_SH_REGS; } - /* TODO: Add allocation for blend constants, push constants, and other buffer - * types. - */ + reg_layout.push_consts.present = + !!(layout->push_constants_shader_stages & BITFIELD_BIT(stage)); + + if (reg_layout.push_consts.present) { + reg_layout.push_consts.offset = next_free_sh_reg; + next_free_sh_reg += PVR_DEV_ADDR_SIZE_IN_SH_REGS; + } *sh_reg_layout_out = reg_layout; diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index 0141e4bbcef..f4910b406ca 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -637,6 +637,12 @@ struct pvr_cmd_buffer_state { struct { uint8_t data[PVR_MAX_PUSH_CONSTANTS_SIZE]; VkShaderStageFlags dirty_stages; + /* Indicates if the whole push constants buffer was uploaded. This avoids + * having to upload the same stuff twice when the push constant range + * covers both gfx and compute. + */ + bool uploaded; + pvr_dev_addr_t dev_addr; } push_constants; /* Array size of barriers_needed is based on number of sync pipeline
