After investigating on this, it appears that COND_WRITE doesn't
work correctly in some situations. I don't know exactly why does
it fail to update DB_Z_INFO.ZRANGE_PRECISION, but as AMDVLK
also uses COND_EXEC I think there is a reason.

Now the driver stores a new metadata value in order to reflect
the last fast depth clear state. If a TC-compat HTILE is fast cleared
with 0.0f, we have to update ZRANGE_PRECISION to 0 in order to
work around that hardware bug.

This fixes rendering issues with The Forest and DXVK and doesn't
seem to introduce any regressions.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108914
Fixes: 68dead112e7 ("radv: update the ZRANGE_PRECISION value for the TC-compat 
bug")
Signed-off-by: Samuel Pitoiset <[email protected]>
---
 src/amd/vulkan/radv_cmd_buffer.c | 91 ++++++++++++++++++++++----------
 src/amd/vulkan/radv_image.c      | 10 +++-
 src/amd/vulkan/radv_private.h    |  2 +
 3 files changed, 75 insertions(+), 28 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index ccf762ead46..23909a0f7dd 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1067,7 +1067,7 @@ static void
 radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
                             struct radv_ds_buffer_info *ds,
                             struct radv_image *image, VkImageLayout layout,
-                            bool requires_cond_write)
+                            bool requires_cond_exec)
 {
        uint32_t db_z_info = ds->db_z_info;
        uint32_t db_z_info_reg;
@@ -1091,38 +1091,21 @@ radv_update_zrange_precision(struct radv_cmd_buffer 
*cmd_buffer,
        }
 
        /* When we don't know the last fast clear value we need to emit a
-        * conditional packet, otherwise we can update DB_Z_INFO directly.
+        * conditional packet that will eventually skip the following
+        * SET_CONTEXT_REG packet.
         */
-       if (requires_cond_write) {
-               radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0));
-
-               const uint32_t write_space = 0 << 8;    /* register */
-               const uint32_t poll_space = 1 << 4;     /* memory */
-               const uint32_t function = 3 << 0;       /* equal to the 
reference */
-               const uint32_t options = write_space | poll_space | function;
-               radeon_emit(cmd_buffer->cs, options);
-
-               /* poll address - location of the depth clear value */
+       if (requires_cond_exec) {
                uint64_t va = radv_buffer_get_va(image->bo);
-               va += image->offset + image->clear_value_offset;
-
-               /* In presence of stencil format, we have to adjust the base
-                * address because the first value is the stencil clear value.
-                */
-               if (vk_format_is_stencil(image->vk_format))
-                       va += 4;
+               va += image->offset + image->tc_compat_zrange_offset;
 
+               radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0));
                radeon_emit(cmd_buffer->cs, va);
                radeon_emit(cmd_buffer->cs, va >> 32);
-
-               radeon_emit(cmd_buffer->cs, fui(0.0f));          /* reference 
value */
-               radeon_emit(cmd_buffer->cs, (uint32_t)-1);       /* comparison 
mask */
-               radeon_emit(cmd_buffer->cs, db_z_info_reg >> 2); /* write 
address low */
-               radeon_emit(cmd_buffer->cs, 0u);                 /* write 
address high */
-               radeon_emit(cmd_buffer->cs, db_z_info);
-       } else {
-               radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, 
db_z_info);
+               radeon_emit(cmd_buffer->cs, 0);
+               radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */
        }
+
+       radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
 }
 
 static void
@@ -1269,6 +1252,45 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer 
*cmd_buffer,
                radeon_emit(cs, fui(ds_clear_value.depth));
 }
 
+/**
+ * Update the TC-compat metadata value for this image.
+ */
+static void
+radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
+                                  struct radv_image *image,
+                                  uint32_t value)
+{
+       struct radeon_cmdbuf *cs = cmd_buffer->cs;
+       uint64_t va = radv_buffer_get_va(image->bo);
+       va += image->offset + image->tc_compat_zrange_offset;
+
+       radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+       radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+                       S_370_WR_CONFIRM(1) |
+                       S_370_ENGINE_SEL(V_370_PFP));
+       radeon_emit(cs, va);
+       radeon_emit(cs, va >> 32);
+       radeon_emit(cs, value);
+}
+
+static void
+radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
+                                     struct radv_image *image,
+                                     VkClearDepthStencilValue ds_clear_value)
+{
+       struct radeon_cmdbuf *cs = cmd_buffer->cs;
+       uint64_t va = radv_buffer_get_va(image->bo);
+       va += image->offset + image->tc_compat_zrange_offset;
+       uint32_t cond_val;
+
+       /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last
+        * depth clear value is 0.0f.
+        */
+       cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0;
+
+       radv_set_tc_compat_zrange_metadata(cmd_buffer, image, cond_val);
+}
+
 /**
  * Update the clear depth/stencil values for this image.
  */
@@ -1282,6 +1304,12 @@ radv_update_ds_clear_metadata(struct radv_cmd_buffer 
*cmd_buffer,
 
        radv_set_ds_clear_metadata(cmd_buffer, image, ds_clear_value, aspects);
 
+       if (radv_image_is_tc_compat_htile(image) &&
+           (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
+               radv_update_tc_compat_zrange_metadata(cmd_buffer, image,
+                                                     ds_clear_value);
+       }
+
        radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value,
                                        aspects);
 }
@@ -4229,6 +4257,15 @@ static void radv_initialize_htile(struct radv_cmd_buffer 
*cmd_buffer,
                aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
 
        radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects);
+
+       if (radv_image_is_tc_compat_htile(image)) {
+               /* Initialize the TC-compat metada value to 0 because by
+                * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only
+                * need have to conditionally update its value when performing
+                * a fast depth clear.
+                */
+               radv_set_tc_compat_zrange_metadata(cmd_buffer, image, 0);
+       }
 }
 
 static void radv_handle_depth_image_transition(struct radv_cmd_buffer 
*cmd_buffer,
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index f447166d80c..9fcbaa3184c 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -870,6 +870,14 @@ radv_image_alloc_htile(struct radv_image *image)
        /* + 8 for storing the clear values */
        image->clear_value_offset = image->htile_offset + 
image->surface.htile_size;
        image->size = image->clear_value_offset + 8;
+       if (radv_image_is_tc_compat_htile(image)) {
+               /* Metadata for the TC-compatible HTILE hardware bug which
+                * have to be fixed by updating ZRANGE_PRECISION when doing
+                * fast depth clears to 0.0f.
+                */
+               image->tc_compat_zrange_offset = image->clear_value_offset + 8;
+               image->size = image->clear_value_offset + 16;
+       }
        image->alignment = align64(image->alignment, 
image->surface.htile_alignment);
 }
 
@@ -1014,8 +1022,8 @@ radv_image_create(VkDevice _device,
                        /* Otherwise, try to enable HTILE for depth surfaces. */
                        if (radv_image_can_enable_htile(image) &&
                            !(device->instance->debug_flags & 
RADV_DEBUG_NO_HIZ)) {
-                               radv_image_alloc_htile(image);
                                image->tc_compatible_htile = 
image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
+                               radv_image_alloc_htile(image);
                        } else {
                                image->surface.htile_size = 0;
                        }
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index ac756f2c247..1ef5b215db7 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1503,6 +1503,8 @@ struct radv_image {
        uint64_t clear_value_offset;
        uint64_t fce_pred_offset;
 
+       uint64_t tc_compat_zrange_offset;
+
        /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
        VkDeviceMemory owned_memory;
 };
-- 
2.19.2

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to