Module: Mesa
Branch: main
Commit: 4096bd8d8593480303e3cdd8a83b00ae71f740c1
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4096bd8d8593480303e3cdd8a83b00ae71f740c1

Author: Karmjit Mahil <[email protected]>
Date:   Tue Jun 13 11:20:08 2023 +0100

pvr: Setup ZLS depth and stencil load/store separately

Previously the code assumed that you could only have depth-stencil
attachments so no stencil only or depth only, for ZLS load/stores.
This isn't true as we can have stencil only attachments so the
ZLS depth and stencil store/load enable have to be set separately.

Other ZLSCTL setup has also been adjusted for separate depth-stencil.
E.g. the z{load,store}format, and {load,store}twiddled.

Co-Authored-By: Soroush Kashani <[email protected]>
Signed-off-by: Karmjit Mahil <[email protected]>
Signed-off-by: Soroush Kashani <[email protected]>
Reviewed-by: Frank Binns <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23830>

---

 src/imagination/csbgen/rogue_cr.xml           | 17 +++----
 src/imagination/vulkan/pvr_cmd_buffer.c       | 68 +++++++++++++++++++++++----
 src/imagination/vulkan/pvr_csb_enum_helpers.h | 38 +++++++++++++++
 src/imagination/vulkan/pvr_job_render.c       | 59 ++++++++---------------
 src/imagination/vulkan/pvr_job_render.h       |  8 ++--
 5 files changed, 127 insertions(+), 63 deletions(-)

diff --git a/src/imagination/csbgen/rogue_cr.xml 
b/src/imagination/csbgen/rogue_cr.xml
index 4a90cfaa2fd..803162a0f28 100644
--- a/src/imagination/csbgen/rogue_cr.xml
+++ b/src/imagination/csbgen/rogue_cr.xml
@@ -142,17 +142,14 @@ SOFTWARE.
     <value name="GAMMA_BOTH_CHANNELS"  value="1"/>
   </enum>
 
-  <enum name="ZLOADFORMAT_TYPE">
-    <value name="F32Z"     value="0"/>
-    <value name="24BITINT" value="1"/>
-    <value name="16BITINT" value="2"/>
-    <value name="F64Z"     value="3"/>
-  </enum>
-
-  <enum name="ZSTOREFORMAT_TYPE">
+  <enum name="ZLS_FORMAT_TYPE">
+    <!-- Separate depth and stencil -->
     <value name="F32Z"     value="0"/>
+    <!-- Packed depth and stencil -->
     <value name="24BITINT" value="1"/>
+    <!-- Separate depth and stencil -->
     <value name="16BITINT" value="2"/>
+    <!-- Packed depth and stencil -->
     <value name="F64Z"     value="3"/>
   </enum>
 
@@ -554,8 +551,8 @@ SOFTWARE.
     <field name="zlsextent_x_s" start="38" end="47" type="uint"/>
     <field name="stencil_extent_enable" start="37" end="37" type="bool"/>
     <field name="zlsextent_y_z" start="27" end="36" type="uint"/>
-    <field name="zstoreformat" start="25" end="26" type="ZSTOREFORMAT_TYPE"/>
-    <field name="zloadformat" start="23" end="24" type="ZLOADFORMAT_TYPE"/>
+    <field name="zstoreformat" start="25" end="26" type="ZLS_FORMAT_TYPE"/>
+    <field name="zloadformat" start="23" end="24" type="ZLS_FORMAT_TYPE"/>
     <field name="fb_storeen" start="22" end="22" type="bool"/>
     <field name="fb_loaden" start="21" end="21" type="bool"/>
     <field name="mstoreen" start="20" end="20" type="bool"/>
diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c 
b/src/imagination/vulkan/pvr_cmd_buffer.c
index a9dfa9933bd..afad1294ec0 100644
--- a/src/imagination/vulkan/pvr_cmd_buffer.c
+++ b/src/imagination/vulkan/pvr_cmd_buffer.c
@@ -1080,6 +1080,17 @@ pvr_pass_get_pixel_output_width(const struct 
pvr_render_pass *pass,
    return util_next_power_of_two(width);
 }
 
+static inline bool
+pvr_ds_attachment_requires_zls(const struct pvr_ds_attachment *attachment)
+{
+   bool zls_used;
+
+   zls_used = attachment->load.d || attachment->load.s;
+   zls_used |= attachment->store.d || attachment->store.s;
+
+   return zls_used;
+}
+
 /**
  * \brief If depth and/or stencil attachment dimensions are not tile-aligned,
  * then we may need to insert some additional transfer subcommands.
@@ -1119,7 +1130,7 @@ static bool pvr_sub_cmd_gfx_requires_ds_subtile_alignment(
 
    /* No ZLS functions enabled; nothing to do. */
    if ((!job->has_depth_attachment && !job->has_stencil_attachment) ||
-       (!job->ds.load && !job->ds.store)) {
+       !pvr_ds_attachment_requires_zls(&job->ds)) {
       return false;
    }
 
@@ -1151,7 +1162,7 @@ pvr_sub_cmd_gfx_align_ds_subtiles(struct pvr_cmd_buffer 
*const cmd_buffer,
    assert(list_last_entry(&cmd_buffer->sub_cmds, struct pvr_sub_cmd, link) ==
           prev_sub_cmd);
 
-   if (!ds->load && !ds->store)
+   if (!pvr_ds_attachment_requires_zls(ds))
       return VK_SUCCESS;
 
    rogue_get_zls_tile_size_xy(&cmd_buffer->device->pdevice->dev_info,
@@ -1203,7 +1214,7 @@ pvr_sub_cmd_gfx_align_ds_subtiles(struct pvr_cmd_buffer 
*const cmd_buffer,
       },
    };
 
-   if (ds->load) {
+   if (ds->load.d || ds->load.s) {
       cmd_buffer->state.current_sub_cmd = NULL;
 
       result =
@@ -1235,7 +1246,7 @@ pvr_sub_cmd_gfx_align_ds_subtiles(struct pvr_cmd_buffer 
*const cmd_buffer,
       cmd_buffer->state.current_sub_cmd = prev_sub_cmd;
    }
 
-   if (ds->store) {
+   if (ds->store.d || ds->store.s) {
       cmd_buffer->state.current_sub_cmd = NULL;
 
       result =
@@ -1501,7 +1512,24 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct 
pvr_device_info *dev_info,
                job->ds_clear_value.stencil = clear_values->stencil;
          }
 
-         job->ds.vk_format = ds_iview->vk.format;
+         switch (ds_iview->vk.format) {
+         case VK_FORMAT_D16_UNORM:
+            job->ds.zls_format = PVRX(CR_ZLS_FORMAT_TYPE_16BITINT);
+            break;
+
+         case VK_FORMAT_S8_UINT:
+         case VK_FORMAT_D32_SFLOAT:
+            job->ds.zls_format = PVRX(CR_ZLS_FORMAT_TYPE_F32Z);
+            break;
+
+         case VK_FORMAT_D24_UNORM_S8_UINT:
+            job->ds.zls_format = PVRX(CR_ZLS_FORMAT_TYPE_24BITINT);
+            break;
+
+         default:
+            unreachable("Unsupported depth stencil format");
+         }
+
          job->ds.memlayout = ds_image->memlayout;
 
          if (job->has_depth_attachment) {
@@ -1557,14 +1585,34 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct 
pvr_device_info *dev_info,
             }
          }
 
-         if (job->has_depth_attachment && job->has_stencil_attachment)
-            assert(d_store == s_store && d_load == s_load);
+         job->ds.load.d = d_load;
+         job->ds.load.s = s_load;
+         job->ds.store.d = d_store;
+         job->ds.store.s = s_store;
 
-         job->ds.store = d_store || s_store;
-         job->ds.load = d_load || s_load;
+         /* ZLS can't do masked writes for packed depth stencil formats so if
+          * we store anything, we have to store everything.
+          */
+         if ((job->ds.store.d || job->ds.store.s) &&
+             pvr_zls_format_type_is_packed(job->ds.zls_format)) {
+            job->ds.store.d = true;
+            job->ds.store.s = true;
+
+            /* In case we are only operating on one aspect of the attachment we
+             * need to load the unused one in order to preserve its contents 
due
+             * to the forced store which might otherwise corrupt it.
+             */
+            if (hw_render->depth_init != VK_ATTACHMENT_LOAD_OP_CLEAR)
+               job->ds.load.d = true;
 
-         if (job->ds.load || job->ds.store || store_was_optimised_out)
+            if (hw_render->stencil_init != VK_ATTACHMENT_LOAD_OP_CLEAR)
+               job->ds.load.s = true;
+         }
+
+         if (pvr_ds_attachment_requires_zls(&job->ds) ||
+             store_was_optimised_out) {
             job->process_empty_tiles = true;
+         }
 
          if (pvr_sub_cmd_gfx_requires_ds_subtile_alignment(dev_info, job)) {
             result = pvr_sub_cmd_gfx_align_ds_subtiles(cmd_buffer, sub_cmd);
diff --git a/src/imagination/vulkan/pvr_csb_enum_helpers.h 
b/src/imagination/vulkan/pvr_csb_enum_helpers.h
index 55c0dac1bae..893dcbee6d3 100644
--- a/src/imagination/vulkan/pvr_csb_enum_helpers.h
+++ b/src/imagination/vulkan/pvr_csb_enum_helpers.h
@@ -70,6 +70,44 @@ pvr_cr_isp_aa_mode_type(uint32_t samples)
    }
 }
 
+/* clang-format off */
+static inline bool
+pvr_zls_format_type_is_packed(enum PVRX(CR_ZLS_FORMAT_TYPE) type)
+/* clang-format on */
+{
+   switch (type) {
+   case PVRX(CR_ZLS_FORMAT_TYPE_24BITINT):
+   case PVRX(CR_ZLS_FORMAT_TYPE_F64Z):
+      return true;
+
+   case PVRX(CR_ZLS_FORMAT_TYPE_F32Z):
+   case PVRX(CR_ZLS_FORMAT_TYPE_16BITINT):
+      return false;
+
+   default:
+      unreachable("Invalid ZLS format type");
+   }
+}
+
+/* clang-format off */
+static inline bool
+pvr_zls_format_type_is_int(enum PVRX(CR_ZLS_FORMAT_TYPE) type)
+/* clang-format on */
+{
+   switch (type) {
+   case PVRX(CR_ZLS_FORMAT_TYPE_24BITINT):
+   case PVRX(CR_ZLS_FORMAT_TYPE_16BITINT):
+      return true;
+
+   case PVRX(CR_ZLS_FORMAT_TYPE_F32Z):
+   case PVRX(CR_ZLS_FORMAT_TYPE_F64Z):
+      return false;
+
+   default:
+      unreachable("Invalid ZLS format type");
+   }
+}
+
 /******************************************************************************
    PDS
  
******************************************************************************/
diff --git a/src/imagination/vulkan/pvr_job_render.c 
b/src/imagination/vulkan/pvr_job_render.c
index 7f8d1697f94..9e661ccf3ba 100644
--- a/src/imagination/vulkan/pvr_job_render.c
+++ b/src/imagination/vulkan/pvr_job_render.c
@@ -1064,8 +1064,8 @@ static void pvr_frag_state_stream_init(struct 
pvr_render_ctx *ctx,
    const enum PVRX(CR_ISP_AA_MODE_TYPE)
       isp_aa_mode = pvr_cr_isp_aa_mode_type(job->samples);
 
+   enum PVRX(CR_ZLS_FORMAT_TYPE) zload_format = PVRX(CR_ZLS_FORMAT_TYPE_F32Z);
    uint32_t *stream_ptr = (uint32_t *)state->fw_stream;
-   enum PVRX(CR_ZLOADFORMAT_TYPE) zload_format;
    uint32_t pixel_ctl;
    uint32_t isp_ctl;
 
@@ -1101,7 +1101,7 @@ static void pvr_frag_state_stream_init(struct 
pvr_render_ctx *ctx,
    stream_ptr += pvr_cmd_length(CR_ISP_OCLQRY_BASE);
 
    pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLSCTL, value) {
-      if (job->has_depth_attachment) {
+      if (job->has_depth_attachment || job->has_stencil_attachment) {
          uint32_t alignment_x;
          uint32_t alignment_y;
 
@@ -1129,44 +1129,24 @@ static void pvr_frag_state_stream_init(struct 
pvr_render_ctx *ctx,
             value.storetwiddled = true;
          }
 
-         switch (job->ds.vk_format) {
-         case VK_FORMAT_D16_UNORM:
-            value.zloadformat = PVRX(CR_ZLOADFORMAT_TYPE_16BITINT);
-            value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_16BITINT);
-            break;
-
-         case VK_FORMAT_D32_SFLOAT:
-            value.zloadformat = PVRX(CR_ZLOADFORMAT_TYPE_F32Z);
-            value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_F32Z);
-            break;
-
-         case VK_FORMAT_D24_UNORM_S8_UINT:
-            value.zloadformat = PVRX(CR_ZLOADFORMAT_TYPE_24BITINT);
-            value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_24BITINT);
-            break;
-
-         default:
-            unreachable("Unsupported depth format");
-         }
-
-         value.zloaden = job->ds.load;
-         value.forcezload = value.zloaden;
-
-         value.zstoreen = job->ds.store;
-         value.forcezstore = value.zstoreen;
+         value.zloadformat = job->ds.zls_format;
+         value.zstoreformat = job->ds.zls_format;
 
          zload_format = value.zloadformat;
-      } else {
-         zload_format = PVRX(CR_ZLOADFORMAT_TYPE_F32Z);
       }
 
-      if (job->has_stencil_attachment) {
-         value.sstoreen = job->ds.store;
-         value.forcezstore = value.sstoreen;
+      if (job->has_depth_attachment) {
+         value.zloaden = job->ds.load.d;
+         value.zstoreen = job->ds.store.d;
+      }
 
-         value.sloaden = job->ds.load;
-         value.forcezload = value.sloaden;
+      if (job->has_stencil_attachment) {
+         value.sloaden = job->ds.load.s;
+         value.sstoreen = job->ds.store.s;
       }
+
+      value.forcezload = value.zloaden || value.sloaden;
+      value.forcezstore = value.zstoreen || value.sstoreen;
    }
    stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL);
 
@@ -1184,7 +1164,7 @@ static void pvr_frag_state_stream_init(struct 
pvr_render_ctx *ctx,
           * in CR_ISP_STENCIL_LOAD_BASE does not contain a depth component.
           */
          assert(job->has_depth_attachment ||
-                job->ds.vk_format == VK_FORMAT_S8_UINT);
+                !pvr_zls_format_type_is_packed(job->ds.zls_format));
          value.enable = !job->has_depth_attachment;
       }
    }
@@ -1236,15 +1216,15 @@ static void pvr_frag_state_stream_init(struct 
pvr_render_ctx *ctx,
        *  - job->depth_clear_value is set to a sensible default in that case.
        */
       switch (zload_format) {
-      case PVRX(CR_ZLOADFORMAT_TYPE_F32Z):
+      case PVRX(CR_ZLS_FORMAT_TYPE_F32Z):
          value.value = fui(depth_clear);
          break;
 
-      case PVRX(CR_ZLOADFORMAT_TYPE_16BITINT):
+      case PVRX(CR_ZLS_FORMAT_TYPE_16BITINT):
          value.value = _mesa_float_to_unorm(depth_clear, 16);
          break;
 
-      case PVRX(CR_ZLOADFORMAT_TYPE_24BITINT):
+      case PVRX(CR_ZLS_FORMAT_TYPE_24BITINT):
          value.value = _mesa_float_to_unorm(depth_clear, 24);
          break;
 
@@ -1277,8 +1257,7 @@ static void pvr_frag_state_stream_init(struct 
pvr_render_ctx *ctx,
        * bias factor of 1.0 equates to 1 ULP of increase to the depth value.
        */
       value.dbias_is_int = PVR_HAS_ERN(dev_info, 42307) &&
-                           (job->ds.vk_format == VK_FORMAT_D16_UNORM ||
-                            job->ds.vk_format == VK_FORMAT_D24_UNORM_S8_UINT);
+                           pvr_zls_format_type_is_int(job->ds.zls_format);
    }
    /* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be
     * possible to fully pack CR_ISP_CTL above rather than having to OR in part
diff --git a/src/imagination/vulkan/pvr_job_render.h 
b/src/imagination/vulkan/pvr_job_render.h
index a214d5e5239..b7c25b10d3a 100644
--- a/src/imagination/vulkan/pvr_job_render.h
+++ b/src/imagination/vulkan/pvr_job_render.h
@@ -97,15 +97,17 @@ struct pvr_render_job {
     * has_stencil_attachment are false, the contents are undefined.
     */
    struct pvr_ds_attachment {
-      bool load;
-      bool store;
+      struct {
+         bool d : 1;
+         bool s : 1;
+      } load, store;
 
       pvr_dev_addr_t addr;
       uint32_t stride;
       uint32_t height;
       VkExtent2D physical_extent;
       uint32_t layer_size;
-      VkFormat vk_format;
+      enum PVRX(CR_ZLS_FORMAT_TYPE) zls_format;
       /* FIXME: This should be of type 'enum pvr_memlayout', but this is 
defined
        * in pvr_private.h, which causes a circular include dependency. For now,
        * treat it as a uint32_t. A couple of ways to possibly fix this:

Reply via email to