Module: Mesa
Branch: main
Commit: 63278778c649bf373468d280742164b9a3c0a791
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=63278778c649bf373468d280742164b9a3c0a791

Author: Karmjit Mahil <[email protected]>
Date:   Mon Dec 12 15:45:17 2022 +0000

pvr: Add push consts support to descriptor program.

Signed-off-by: Karmjit Mahil <[email protected]>
Reviewed-by: Luigi Santivetti <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21387>

---

 src/imagination/vulkan/pds/pvr_pds.h        |  1 +
 src/imagination/vulkan/pvr_cmd_buffer.c     | 95 ++++++++++++++++++++++++++---
 src/imagination/vulkan/pvr_common.h         | 11 ++++
 src/imagination/vulkan/pvr_descriptor_set.c | 17 +++++-
 src/imagination/vulkan/pvr_pipeline.c       | 20 ++++--
 src/imagination/vulkan/pvr_private.h        |  6 ++
 6 files changed, 135 insertions(+), 15 deletions(-)

diff --git a/src/imagination/vulkan/pds/pvr_pds.h 
b/src/imagination/vulkan/pds/pvr_pds.h
index ccf95e05bb1..37c74856e4c 100644
--- a/src/imagination/vulkan/pds/pvr_pds.h
+++ b/src/imagination/vulkan/pds/pvr_pds.h
@@ -98,6 +98,7 @@ enum pvr_pds_vertex_attrib_program_type {
 
 enum pvr_pds_addr_literal_type {
    PVR_PDS_ADDR_LITERAL_DESC_SET_ADDRS_TABLE,
+   PVR_PDS_ADDR_LITERAL_PUSH_CONSTS,
 };
 
 /*****************************************************************************
diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c 
b/src/imagination/vulkan/pvr_cmd_buffer.c
index 8a95f7424db..7f0f7ad0d1a 100644
--- a/src/imagination/vulkan/pvr_cmd_buffer.c
+++ b/src/imagination/vulkan/pvr_cmd_buffer.c
@@ -2308,6 +2308,7 @@ void pvr_CmdPushConstants(VkCommandBuffer commandBuffer,
    memcpy(&state->push_constants.data[offset], pValues, size);
 
    state->push_constants.dirty_stages |= stageFlags;
+   state->push_constants.uploaded = false;
 }
 
 static VkResult
@@ -3278,6 +3279,21 @@ pvr_process_addr_literal(struct pvr_cmd_buffer 
*cmd_buffer,
       break;
    }
 
+   case PVR_PDS_ADDR_LITERAL_PUSH_CONSTS: {
+      const struct pvr_pipeline_layout *layout =
+         PVR_SELECT(cmd_buffer->state.gfx_pipeline->base.layout,
+                    cmd_buffer->state.gfx_pipeline->base.layout,
+                    cmd_buffer->state.compute_pipeline->base.layout);
+      const uint32_t push_constants_offset =
+         PVR_SELECT(layout->vert_push_constants_offset,
+                    layout->frag_push_constants_offset,
+                    layout->compute_push_constants_offset);
+
+      *addr_out = 
PVR_DEV_ADDR_OFFSET(cmd_buffer->state.push_constants.dev_addr,
+                                      push_constants_offset);
+      break;
+   }
+
    default:
       unreachable("Invalid add literal type.");
    }
@@ -3751,6 +3767,49 @@ static void pvr_compute_update_kernel(
    pvr_compute_generate_control_stream(csb, sub_cmd, &info);
 }
 
+static VkResult pvr_cmd_upload_push_consts(struct pvr_cmd_buffer *cmd_buffer)
+{
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+   struct pvr_bo *bo;
+   VkResult result;
+
+   /* TODO: Here are some possible optimizations/things to consider:
+    *
+    *    - Currently we upload maxPushConstantsSize. The application might only
+    *      be using a portion of that so we might end up with unused memory.
+    *      Should we be smarter about this. If we intend to upload the push
+    *      consts into shareds, we definitely want to do avoid reserving unused
+    *      regs.
+    *
+    *    - For now we have to upload to a new buffer each time since the 
shaders
+    *      access the push constants from memory. If we were to reuse the same
+    *      buffer we might update the contents out of sync with job submission
+    *      and the shaders will see the updated contents while the command
+    *      buffer was still being recorded and not yet submitted.
+    *      If we were to upload the push constants directly to shared regs we
+    *      could reuse the same buffer (avoiding extra allocation overhead)
+    *      since the contents will be DMAed only on job submission when the
+    *      control stream is processed and the PDS program is executed. This
+    *      approach would also allow us to avoid regenerating the PDS data
+    *      section in some cases since the buffer address will be constants.
+    */
+
+   if (cmd_buffer->state.push_constants.uploaded)
+      return VK_SUCCESS;
+
+   result = pvr_cmd_buffer_upload_general(cmd_buffer,
+                                          state->push_constants.data,
+                                          sizeof(state->push_constants.data),
+                                          &bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   cmd_buffer->state.push_constants.dev_addr = bo->vma->dev_addr;
+   cmd_buffer->state.push_constants.uploaded = true;
+
+   return VK_SUCCESS;
+}
+
 static void pvr_cmd_dispatch(
    struct pvr_cmd_buffer *const cmd_buffer,
    const pvr_dev_addr_t indirect_addr,
@@ -3759,8 +3818,6 @@ static void pvr_cmd_dispatch(
    struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
    const struct pvr_compute_pipeline *compute_pipeline =
       state->compute_pipeline;
-   const VkShaderStageFlags push_consts_stage_mask =
-      compute_pipeline->base.layout->push_constants_shader_stages;
    struct pvr_sub_cmd_compute *sub_cmd;
    VkResult result;
 
@@ -3770,11 +3827,15 @@ static void pvr_cmd_dispatch(
    sub_cmd->uses_atomic_ops |= compute_pipeline->shader_state.uses_atomic_ops;
    sub_cmd->uses_barrier |= compute_pipeline->shader_state.uses_barrier;
 
-   if (push_consts_stage_mask & VK_SHADER_STAGE_COMPUTE_BIT) {
-      /* TODO: Add a dirty push constants mask in the cmd_buffer state and
-       * check for dirty compute stage.
-       */
-      pvr_finishme("Add support for push constants.");
+   if (state->push_constants.dirty_stages & VK_SHADER_STAGE_COMPUTE_BIT) {
+      result = pvr_cmd_upload_push_consts(cmd_buffer);
+      if (result != VK_SUCCESS)
+         return;
+
+      /* Regenerate the PDS program to use the new push consts buffer. */
+      state->dirty.compute_desc_dirty = true;
+
+      state->push_constants.dirty_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT;
    }
 
    if (compute_pipeline->shader_state.uses_num_workgroups) {
@@ -5042,6 +5103,11 @@ pvr_ppp_state_update_required(const struct 
pvr_cmd_buffer *cmd_buffer)
    const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
    const struct PVRX(TA_STATE_HEADER) *const header = &state->emit_header;
 
+   /* For push constants we only need to worry if they are updated for the
+    * fragment stage since we're only updating the pds programs used in the
+    * fragment stage.
+    */
+
    return header->pres_ppp_ctrl || header->pres_ispctl ||
           header->pres_ispctl_fb || header->pres_ispctl_ba ||
           header->pres_ispctl_bb || header->pres_ispctl_dbsc ||
@@ -5053,6 +5119,7 @@ pvr_ppp_state_update_required(const struct pvr_cmd_buffer 
*cmd_buffer)
           header->pres_varying_word2 || header->pres_stream_out_program ||
           state->dirty.fragment_descriptors ||
           state->dirty.gfx_pipeline_binding || state->dirty.isp_userpass ||
+          state->push_constants.dirty_stages & VK_SHADER_STAGE_FRAGMENT_BIT ||
           BITSET_TEST(dynamic_dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) 
||
           BITSET_TEST(dynamic_dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
           BITSET_TEST(dynamic_dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) ||
@@ -5451,7 +5518,11 @@ static VkResult pvr_validate_draw_state(struct 
pvr_cmd_buffer *cmd_buffer)
       pvr_setup_vertex_buffers(cmd_buffer, gfx_pipeline);
    }
 
-   /* TODO: Check for dirty push constants */
+   if (state->push_constants.dirty_stages & VK_SHADER_STAGE_ALL_GRAPHICS) {
+      result = pvr_cmd_upload_push_consts(cmd_buffer);
+      if (result != VK_SUCCESS)
+         return result;
+   }
 
    state->dirty.vertex_descriptors = state->dirty.gfx_pipeline_binding;
    state->dirty.fragment_descriptors = state->dirty.vertex_descriptors;
@@ -5468,6 +5539,12 @@ static VkResult pvr_validate_draw_state(struct 
pvr_cmd_buffer *cmd_buffer)
    if (BITSET_TEST(dynamic_state->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS))
       state->dirty.fragment_descriptors = true;
 
+   state->dirty.vertex_descriptors |=
+      state->push_constants.dirty_stages &
+      (VK_SHADER_STAGE_ALL_GRAPHICS & ~VK_SHADER_STAGE_FRAGMENT_BIT);
+   state->dirty.fragment_descriptors |= state->push_constants.dirty_stages &
+                                        VK_SHADER_STAGE_FRAGMENT_BIT;
+
    if (state->dirty.fragment_descriptors) {
       result = pvr_setup_descriptor_mappings(
          cmd_buffer,
@@ -5513,6 +5590,8 @@ static VkResult pvr_validate_draw_state(struct 
pvr_cmd_buffer *cmd_buffer)
    state->dirty.vertex_bindings = false;
    state->dirty.vis_test = false;
 
+   state->push_constants.dirty_stages &= ~VK_SHADER_STAGE_ALL_GRAPHICS;
+
    return VK_SUCCESS;
 }
 
diff --git a/src/imagination/vulkan/pvr_common.h 
b/src/imagination/vulkan/pvr_common.h
index 01cac1ed15a..3ffd4ede316 100644
--- a/src/imagination/vulkan/pvr_common.h
+++ b/src/imagination/vulkan/pvr_common.h
@@ -339,6 +339,14 @@ struct pvr_sh_reg_layout {
       bool present;
       uint32_t offset;
    } descriptor_set_addrs_table;
+
+   /* If this is present, it will always take up 2 sh regs in size and contain
+    * the device address of the push constants buffer.
+    */
+   struct {
+      bool present;
+      uint32_t offset;
+   } push_consts;
 };
 
 struct pvr_pipeline_layout {
@@ -349,6 +357,9 @@ struct pvr_pipeline_layout {
    struct pvr_descriptor_set_layout *set_layout[PVR_MAX_DESCRIPTOR_SETS];
 
    VkShaderStageFlags push_constants_shader_stages;
+   uint32_t vert_push_constants_offset;
+   uint32_t frag_push_constants_offset;
+   uint32_t compute_push_constants_offset;
 
    /* Mask of enum pvr_stage_allocation. */
    uint8_t shader_stage_mask;
diff --git a/src/imagination/vulkan/pvr_descriptor_set.c 
b/src/imagination/vulkan/pvr_descriptor_set.c
index fed709d3bae..5681ff864db 100644
--- a/src/imagination/vulkan/pvr_descriptor_set.c
+++ b/src/imagination/vulkan/pvr_descriptor_set.c
@@ -985,10 +985,25 @@ VkResult pvr_CreatePipelineLayout(VkDevice _device,
    }
 
    layout->push_constants_shader_stages = 0;
-   for (uint32_t i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) {
+   for (uint32_t i = 0; i < pCreateInfo->pushConstantRangeCount; i++) {
       const VkPushConstantRange *range = &pCreateInfo->pPushConstantRanges[i];
 
       layout->push_constants_shader_stages |= range->stageFlags;
+
+      /* From the Vulkan spec. 1.3.237
+       * VUID-VkPipelineLayoutCreateInfo-pPushConstantRanges-00292 :
+       *
+       *    "Any two elements of pPushConstantRanges must not include the same
+       *     stage in stageFlags"
+       */
+      if (range->stageFlags & VK_SHADER_STAGE_VERTEX_BIT)
+         layout->vert_push_constants_offset = range->offset;
+
+      if (range->stageFlags & VK_SHADER_STAGE_FRAGMENT_BIT)
+         layout->frag_push_constants_offset = range->offset;
+
+      if (range->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
+         layout->compute_push_constants_offset = range->offset;
    }
 
 #if defined(DEBUG)
diff --git a/src/imagination/vulkan/pvr_pipeline.c 
b/src/imagination/vulkan/pvr_pipeline.c
index a319c97b2e0..6c513db67d1 100644
--- a/src/imagination/vulkan/pvr_pipeline.c
+++ b/src/imagination/vulkan/pvr_pipeline.c
@@ -730,9 +730,13 @@ static VkResult 
pvr_pds_descriptor_program_create_and_upload(
          addr_literals++;
       }
 
-      /* TODO: Add support for other allocation types. E.g. blend constants
-       * and push constants.
-       */
+      if (sh_reg_layout->push_consts.present) {
+         program.addr_literals[addr_literals] = (struct pvr_pds_addr_literal){
+            .type = PVR_PDS_ADDR_LITERAL_PUSH_CONSTS,
+            .destination = sh_reg_layout->push_consts.offset,
+         };
+         addr_literals++;
+      }
 
       program.addr_literal_count = addr_literals;
    }
@@ -1105,9 +1109,13 @@ pvr_pipeline_alloc_shareds(const struct pvr_device 
*device,
       next_free_sh_reg += PVR_DEV_ADDR_SIZE_IN_SH_REGS;
    }
 
-   /* TODO: Add allocation for blend constants, push constants, and other 
buffer
-    * types.
-    */
+   reg_layout.push_consts.present =
+      !!(layout->push_constants_shader_stages & BITFIELD_BIT(stage));
+
+   if (reg_layout.push_consts.present) {
+      reg_layout.push_consts.offset = next_free_sh_reg;
+      next_free_sh_reg += PVR_DEV_ADDR_SIZE_IN_SH_REGS;
+   }
 
    *sh_reg_layout_out = reg_layout;
 
diff --git a/src/imagination/vulkan/pvr_private.h 
b/src/imagination/vulkan/pvr_private.h
index 0141e4bbcef..f4910b406ca 100644
--- a/src/imagination/vulkan/pvr_private.h
+++ b/src/imagination/vulkan/pvr_private.h
@@ -637,6 +637,12 @@ struct pvr_cmd_buffer_state {
    struct {
       uint8_t data[PVR_MAX_PUSH_CONSTANTS_SIZE];
       VkShaderStageFlags dirty_stages;
+      /* Indicates if the whole push constants buffer was uploaded. This avoids
+       * having to upload the same stuff twice when the push constant range
+       * covers both gfx and compute.
+       */
+      bool uploaded;
+      pvr_dev_addr_t dev_addr;
    } push_constants;
 
    /* Array size of barriers_needed is based on number of sync pipeline

Reply via email to