Module: Mesa Branch: main Commit: 4096bd8d8593480303e3cdd8a83b00ae71f740c1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4096bd8d8593480303e3cdd8a83b00ae71f740c1
Author: Karmjit Mahil <[email protected]> Date: Tue Jun 13 11:20:08 2023 +0100 pvr: Setup ZLS depth and stencil load/store separately Previously the code assumed that you could only have depth-stencil attachments so no stencil only or depth only, for ZLS load/stores. This isn't true as we can have stencil only attachments so the ZLS depth and stencil store/load enable have to be set separately. Other ZLSCTL setup has also been adjusted for separate depth-stencil. E.g. the z{load,store}format, and {load,store}twiddled. Co-Authored-By: Soroush Kashani <[email protected]> Signed-off-by: Karmjit Mahil <[email protected]> Signed-off-by: Soroush Kashani <[email protected]> Reviewed-by: Frank Binns <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23830> --- src/imagination/csbgen/rogue_cr.xml | 17 +++---- src/imagination/vulkan/pvr_cmd_buffer.c | 68 +++++++++++++++++++++++---- src/imagination/vulkan/pvr_csb_enum_helpers.h | 38 +++++++++++++++ src/imagination/vulkan/pvr_job_render.c | 59 ++++++++--------------- src/imagination/vulkan/pvr_job_render.h | 8 ++-- 5 files changed, 127 insertions(+), 63 deletions(-) diff --git a/src/imagination/csbgen/rogue_cr.xml b/src/imagination/csbgen/rogue_cr.xml index 4a90cfaa2fd..803162a0f28 100644 --- a/src/imagination/csbgen/rogue_cr.xml +++ b/src/imagination/csbgen/rogue_cr.xml @@ -142,17 +142,14 @@ SOFTWARE. <value name="GAMMA_BOTH_CHANNELS" value="1"/> </enum> - <enum name="ZLOADFORMAT_TYPE"> - <value name="F32Z" value="0"/> - <value name="24BITINT" value="1"/> - <value name="16BITINT" value="2"/> - <value name="F64Z" value="3"/> - </enum> - - <enum name="ZSTOREFORMAT_TYPE"> + <enum name="ZLS_FORMAT_TYPE"> + <!-- Separate depth and stencil --> <value name="F32Z" value="0"/> + <!-- Packed depth and stencil --> <value name="24BITINT" value="1"/> + <!-- Separate depth and stencil --> <value name="16BITINT" value="2"/> + <!-- Packed depth and stencil --> <value name="F64Z" value="3"/> </enum> @@ -554,8 +551,8 @@ SOFTWARE. <field name="zlsextent_x_s" start="38" end="47" type="uint"/> <field name="stencil_extent_enable" start="37" end="37" type="bool"/> <field name="zlsextent_y_z" start="27" end="36" type="uint"/> - <field name="zstoreformat" start="25" end="26" type="ZSTOREFORMAT_TYPE"/> - <field name="zloadformat" start="23" end="24" type="ZLOADFORMAT_TYPE"/> + <field name="zstoreformat" start="25" end="26" type="ZLS_FORMAT_TYPE"/> + <field name="zloadformat" start="23" end="24" type="ZLS_FORMAT_TYPE"/> <field name="fb_storeen" start="22" end="22" type="bool"/> <field name="fb_loaden" start="21" end="21" type="bool"/> <field name="mstoreen" start="20" end="20" type="bool"/> diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index a9dfa9933bd..afad1294ec0 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -1080,6 +1080,17 @@ pvr_pass_get_pixel_output_width(const struct pvr_render_pass *pass, return util_next_power_of_two(width); } +static inline bool +pvr_ds_attachment_requires_zls(const struct pvr_ds_attachment *attachment) +{ + bool zls_used; + + zls_used = attachment->load.d || attachment->load.s; + zls_used |= attachment->store.d || attachment->store.s; + + return zls_used; +} + /** * \brief If depth and/or stencil attachment dimensions are not tile-aligned, * then we may need to insert some additional transfer subcommands. @@ -1119,7 +1130,7 @@ static bool pvr_sub_cmd_gfx_requires_ds_subtile_alignment( /* No ZLS functions enabled; nothing to do. */ if ((!job->has_depth_attachment && !job->has_stencil_attachment) || - (!job->ds.load && !job->ds.store)) { + !pvr_ds_attachment_requires_zls(&job->ds)) { return false; } @@ -1151,7 +1162,7 @@ pvr_sub_cmd_gfx_align_ds_subtiles(struct pvr_cmd_buffer *const cmd_buffer, assert(list_last_entry(&cmd_buffer->sub_cmds, struct pvr_sub_cmd, link) == prev_sub_cmd); - if (!ds->load && !ds->store) + if (!pvr_ds_attachment_requires_zls(ds)) return VK_SUCCESS; rogue_get_zls_tile_size_xy(&cmd_buffer->device->pdevice->dev_info, @@ -1203,7 +1214,7 @@ pvr_sub_cmd_gfx_align_ds_subtiles(struct pvr_cmd_buffer *const cmd_buffer, }, }; - if (ds->load) { + if (ds->load.d || ds->load.s) { cmd_buffer->state.current_sub_cmd = NULL; result = @@ -1235,7 +1246,7 @@ pvr_sub_cmd_gfx_align_ds_subtiles(struct pvr_cmd_buffer *const cmd_buffer, cmd_buffer->state.current_sub_cmd = prev_sub_cmd; } - if (ds->store) { + if (ds->store.d || ds->store.s) { cmd_buffer->state.current_sub_cmd = NULL; result = @@ -1501,7 +1512,24 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, job->ds_clear_value.stencil = clear_values->stencil; } - job->ds.vk_format = ds_iview->vk.format; + switch (ds_iview->vk.format) { + case VK_FORMAT_D16_UNORM: + job->ds.zls_format = PVRX(CR_ZLS_FORMAT_TYPE_16BITINT); + break; + + case VK_FORMAT_S8_UINT: + case VK_FORMAT_D32_SFLOAT: + job->ds.zls_format = PVRX(CR_ZLS_FORMAT_TYPE_F32Z); + break; + + case VK_FORMAT_D24_UNORM_S8_UINT: + job->ds.zls_format = PVRX(CR_ZLS_FORMAT_TYPE_24BITINT); + break; + + default: + unreachable("Unsupported depth stencil format"); + } + job->ds.memlayout = ds_image->memlayout; if (job->has_depth_attachment) { @@ -1557,14 +1585,34 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, } } - if (job->has_depth_attachment && job->has_stencil_attachment) - assert(d_store == s_store && d_load == s_load); + job->ds.load.d = d_load; + job->ds.load.s = s_load; + job->ds.store.d = d_store; + job->ds.store.s = s_store; - job->ds.store = d_store || s_store; - job->ds.load = d_load || s_load; + /* ZLS can't do masked writes for packed depth stencil formats so if + * we store anything, we have to store everything. + */ + if ((job->ds.store.d || job->ds.store.s) && + pvr_zls_format_type_is_packed(job->ds.zls_format)) { + job->ds.store.d = true; + job->ds.store.s = true; + + /* In case we are only operating on one aspect of the attachment we + * need to load the unused one in order to preserve its contents due + * to the forced store which might otherwise corrupt it. + */ + if (hw_render->depth_init != VK_ATTACHMENT_LOAD_OP_CLEAR) + job->ds.load.d = true; - if (job->ds.load || job->ds.store || store_was_optimised_out) + if (hw_render->stencil_init != VK_ATTACHMENT_LOAD_OP_CLEAR) + job->ds.load.s = true; + } + + if (pvr_ds_attachment_requires_zls(&job->ds) || + store_was_optimised_out) { job->process_empty_tiles = true; + } if (pvr_sub_cmd_gfx_requires_ds_subtile_alignment(dev_info, job)) { result = pvr_sub_cmd_gfx_align_ds_subtiles(cmd_buffer, sub_cmd); diff --git a/src/imagination/vulkan/pvr_csb_enum_helpers.h b/src/imagination/vulkan/pvr_csb_enum_helpers.h index 55c0dac1bae..893dcbee6d3 100644 --- a/src/imagination/vulkan/pvr_csb_enum_helpers.h +++ b/src/imagination/vulkan/pvr_csb_enum_helpers.h @@ -70,6 +70,44 @@ pvr_cr_isp_aa_mode_type(uint32_t samples) } } +/* clang-format off */ +static inline bool +pvr_zls_format_type_is_packed(enum PVRX(CR_ZLS_FORMAT_TYPE) type) +/* clang-format on */ +{ + switch (type) { + case PVRX(CR_ZLS_FORMAT_TYPE_24BITINT): + case PVRX(CR_ZLS_FORMAT_TYPE_F64Z): + return true; + + case PVRX(CR_ZLS_FORMAT_TYPE_F32Z): + case PVRX(CR_ZLS_FORMAT_TYPE_16BITINT): + return false; + + default: + unreachable("Invalid ZLS format type"); + } +} + +/* clang-format off */ +static inline bool +pvr_zls_format_type_is_int(enum PVRX(CR_ZLS_FORMAT_TYPE) type) +/* clang-format on */ +{ + switch (type) { + case PVRX(CR_ZLS_FORMAT_TYPE_24BITINT): + case PVRX(CR_ZLS_FORMAT_TYPE_16BITINT): + return true; + + case PVRX(CR_ZLS_FORMAT_TYPE_F32Z): + case PVRX(CR_ZLS_FORMAT_TYPE_F64Z): + return false; + + default: + unreachable("Invalid ZLS format type"); + } +} + /****************************************************************************** PDS ******************************************************************************/ diff --git a/src/imagination/vulkan/pvr_job_render.c b/src/imagination/vulkan/pvr_job_render.c index 7f8d1697f94..9e661ccf3ba 100644 --- a/src/imagination/vulkan/pvr_job_render.c +++ b/src/imagination/vulkan/pvr_job_render.c @@ -1064,8 +1064,8 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, const enum PVRX(CR_ISP_AA_MODE_TYPE) isp_aa_mode = pvr_cr_isp_aa_mode_type(job->samples); + enum PVRX(CR_ZLS_FORMAT_TYPE) zload_format = PVRX(CR_ZLS_FORMAT_TYPE_F32Z); uint32_t *stream_ptr = (uint32_t *)state->fw_stream; - enum PVRX(CR_ZLOADFORMAT_TYPE) zload_format; uint32_t pixel_ctl; uint32_t isp_ctl; @@ -1101,7 +1101,7 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, stream_ptr += pvr_cmd_length(CR_ISP_OCLQRY_BASE); pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLSCTL, value) { - if (job->has_depth_attachment) { + if (job->has_depth_attachment || job->has_stencil_attachment) { uint32_t alignment_x; uint32_t alignment_y; @@ -1129,44 +1129,24 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, value.storetwiddled = true; } - switch (job->ds.vk_format) { - case VK_FORMAT_D16_UNORM: - value.zloadformat = PVRX(CR_ZLOADFORMAT_TYPE_16BITINT); - value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_16BITINT); - break; - - case VK_FORMAT_D32_SFLOAT: - value.zloadformat = PVRX(CR_ZLOADFORMAT_TYPE_F32Z); - value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_F32Z); - break; - - case VK_FORMAT_D24_UNORM_S8_UINT: - value.zloadformat = PVRX(CR_ZLOADFORMAT_TYPE_24BITINT); - value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_24BITINT); - break; - - default: - unreachable("Unsupported depth format"); - } - - value.zloaden = job->ds.load; - value.forcezload = value.zloaden; - - value.zstoreen = job->ds.store; - value.forcezstore = value.zstoreen; + value.zloadformat = job->ds.zls_format; + value.zstoreformat = job->ds.zls_format; zload_format = value.zloadformat; - } else { - zload_format = PVRX(CR_ZLOADFORMAT_TYPE_F32Z); } - if (job->has_stencil_attachment) { - value.sstoreen = job->ds.store; - value.forcezstore = value.sstoreen; + if (job->has_depth_attachment) { + value.zloaden = job->ds.load.d; + value.zstoreen = job->ds.store.d; + } - value.sloaden = job->ds.load; - value.forcezload = value.sloaden; + if (job->has_stencil_attachment) { + value.sloaden = job->ds.load.s; + value.sstoreen = job->ds.store.s; } + + value.forcezload = value.zloaden || value.sloaden; + value.forcezstore = value.zstoreen || value.sstoreen; } stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL); @@ -1184,7 +1164,7 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, * in CR_ISP_STENCIL_LOAD_BASE does not contain a depth component. */ assert(job->has_depth_attachment || - job->ds.vk_format == VK_FORMAT_S8_UINT); + !pvr_zls_format_type_is_packed(job->ds.zls_format)); value.enable = !job->has_depth_attachment; } } @@ -1236,15 +1216,15 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, * - job->depth_clear_value is set to a sensible default in that case. */ switch (zload_format) { - case PVRX(CR_ZLOADFORMAT_TYPE_F32Z): + case PVRX(CR_ZLS_FORMAT_TYPE_F32Z): value.value = fui(depth_clear); break; - case PVRX(CR_ZLOADFORMAT_TYPE_16BITINT): + case PVRX(CR_ZLS_FORMAT_TYPE_16BITINT): value.value = _mesa_float_to_unorm(depth_clear, 16); break; - case PVRX(CR_ZLOADFORMAT_TYPE_24BITINT): + case PVRX(CR_ZLS_FORMAT_TYPE_24BITINT): value.value = _mesa_float_to_unorm(depth_clear, 24); break; @@ -1277,8 +1257,7 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, * bias factor of 1.0 equates to 1 ULP of increase to the depth value. */ value.dbias_is_int = PVR_HAS_ERN(dev_info, 42307) && - (job->ds.vk_format == VK_FORMAT_D16_UNORM || - job->ds.vk_format == VK_FORMAT_D24_UNORM_S8_UINT); + pvr_zls_format_type_is_int(job->ds.zls_format); } /* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be * possible to fully pack CR_ISP_CTL above rather than having to OR in part diff --git a/src/imagination/vulkan/pvr_job_render.h b/src/imagination/vulkan/pvr_job_render.h index a214d5e5239..b7c25b10d3a 100644 --- a/src/imagination/vulkan/pvr_job_render.h +++ b/src/imagination/vulkan/pvr_job_render.h @@ -97,15 +97,17 @@ struct pvr_render_job { * has_stencil_attachment are false, the contents are undefined. */ struct pvr_ds_attachment { - bool load; - bool store; + struct { + bool d : 1; + bool s : 1; + } load, store; pvr_dev_addr_t addr; uint32_t stride; uint32_t height; VkExtent2D physical_extent; uint32_t layer_size; - VkFormat vk_format; + enum PVRX(CR_ZLS_FORMAT_TYPE) zls_format; /* FIXME: This should be of type 'enum pvr_memlayout', but this is defined * in pvr_private.h, which causes a circular include dependency. For now, * treat it as a uint32_t. A couple of ways to possibly fix this:
