From: Jason Ekstrand <jason.ekstr...@intel.com>

Nanley Chery:
(rebase)
 - Resolve conflicts with the new anv_batch_emit macro
(amend)
 - Update commit title
 - Combine all HZ operations into one function
 - Add code for performing HiZ resolve operations
 - Add proper stencil and multisampling support
 - Set the proper clear rectangles
 - Add required cases for aborting an HZ operation

Signed-off-by: Nanley Chery <nanley.g.ch...@intel.com>
---
 src/intel/vulkan/anv_genX.h        |   3 +
 src/intel/vulkan/anv_private.h     |   6 ++
 src/intel/vulkan/gen7_cmd_buffer.c |   5 ++
 src/intel/vulkan/gen8_cmd_buffer.c | 124 +++++++++++++++++++++++++++++++++++++
 4 files changed, 138 insertions(+)

diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index cf5a232..16de990 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -54,6 +54,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct 
anv_cmd_buffer *cmd_buffer);
 
 void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer);
 
+void genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer,
+                               enum anv_hz_op op);
+
 VkResult
 genX(graphics_pipeline_create)(VkDevice _device,
                                struct anv_pipeline_cache *cache,
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 5718a19..40325fd 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1401,6 +1401,12 @@ anv_cmd_buffer_get_depth_stencil_view(const struct 
anv_cmd_buffer *cmd_buffer);
 
 void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
 
+enum anv_hz_op {
+   ANV_HZ_OP_CLEAR,
+   ANV_HZ_OP_HIZ_RESOLVE,
+   ANV_HZ_OP_DEPTH_RESOLVE,
+};
+
 struct anv_fence {
    struct anv_bo bo;
    struct drm_i915_gem_execbuffer2 execbuf;
diff --git a/src/intel/vulkan/gen7_cmd_buffer.c 
b/src/intel/vulkan/gen7_cmd_buffer.c
index 61778aa..a057a04 100644
--- a/src/intel/vulkan/gen7_cmd_buffer.c
+++ b/src/intel/vulkan/gen7_cmd_buffer.c
@@ -323,6 +323,11 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer 
*cmd_buffer)
    cmd_buffer->state.dirty = 0;
 }
 
+void
+genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum anv_hz_op op)
+{
+}
+
 void genX(CmdSetEvent)(
     VkCommandBuffer                             commandBuffer,
     VkEvent                                     event,
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c 
b/src/intel/vulkan/gen8_cmd_buffer.c
index e22b4e2..4f27350 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -399,6 +399,130 @@ genX(cmd_buffer_flush_compute_state)(struct 
anv_cmd_buffer *cmd_buffer)
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 }
 
+
+/**
+ * Emit the HZ_OP packet in the sequence specified by the BDW PRM section
+ * entitled: "Optimized Depth Buffer Clear and/or Stencil Buffer Clear."
+ */
+void
+genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum anv_hz_op op)
+{
+   struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+   const struct anv_image_view *iview =
+      anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
+
+   if (iview == NULL || !anv_image_has_hiz(iview->image))
+      return;
+
+   const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
+   const bool full_surface_op =
+             cmd_state->render_area.extent.width == iview->extent.width &&
+             cmd_state->render_area.extent.height == iview->extent.height;
+
+   /* Validate that we can perform the HZ operation and that it's necessary. */
+   switch (op) {
+   case ANV_HZ_OP_CLEAR:
+      if (cmd_buffer->state.pass->attachments[ds].load_op !=
+          VK_ATTACHMENT_LOAD_OP_CLEAR)
+         return;
+
+      /* Apply alignment restrictions. For a sample count of 16, the formulas
+       * reduce to identity and indicate that no alignment is required.
+       */
+      if (!full_surface_op && iview->image->samples < 16) {
+         uint32_t align_w = 1;
+         uint32_t align_h = 1;
+
+         if (iview->image->samples > 1) {
+            isl_msaa_interleaved_scale_px_to_sa(iview->image->samples,
+                                                &align_w, &align_h);
+         }
+
+         align_w = 8 / align_w;
+         align_h = 4 / align_h;
+
+         if (cmd_state->render_area.offset.x % align_w ||
+             cmd_state->render_area.offset.y % align_h ||
+             cmd_state->render_area.extent.width % align_w ||
+             cmd_state->render_area.extent.height % align_h)
+            return;
+      }
+      break;
+   case ANV_HZ_OP_DEPTH_RESOLVE:
+      if (cmd_buffer->state.pass->attachments[ds].store_op !=
+          VK_ATTACHMENT_STORE_OP_STORE)
+         return;
+      break;
+   case ANV_HZ_OP_HIZ_RESOLVE:
+      if (cmd_buffer->state.pass->attachments[ds].load_op !=
+          VK_ATTACHMENT_LOAD_OP_LOAD)
+         return;
+      break;
+   }
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_HZ_OP), hzp) {
+      switch (op) {
+      case ANV_HZ_OP_CLEAR:
+         hzp.StencilBufferClearEnable = VK_IMAGE_ASPECT_STENCIL_BIT &
+                            cmd_state->attachments[ds].pending_clear_aspects;
+         hzp.DepthBufferClearEnable = VK_IMAGE_ASPECT_DEPTH_BIT &
+                            cmd_state->attachments[ds].pending_clear_aspects;
+         hzp.FullSurfaceDepthandStencilClear = full_surface_op;
+         hzp.StencilClearValue = 0xff &
+                   cmd_state->attachments[ds].clear_value.depthStencil.stencil;
+
+         /* Mark aspects as cleared */
+         cmd_state->attachments[ds].pending_clear_aspects = 0;
+         break;
+      case ANV_HZ_OP_DEPTH_RESOLVE:
+         hzp.DepthBufferResolveEnable = true;
+         break;
+      case ANV_HZ_OP_HIZ_RESOLVE:
+         hzp.HierarchicalDepthBufferResolveEnable = true;
+         break;
+      }
+
+      /* The depth resolve rectangle must match the size of the previous clear
+       * rectangle.
+       *
+       * The HiZ resolve rectangle is specified as needing to be the
+       * size of the full RT and aligned to 8x4, these requirements are in
+       * conflict if the RT extent is not a multiple of 8x4. Testing shows
+       * that setting the rectangle to match the render area works just fine.
+       *
+       * In a manner similar to i965, we'd like to diverge from the PRMs here
+       * to reduce the number of HiZ blocks written to.
+       */
+      hzp.ClearRectangleXMin = anv_minify(cmd_state->render_area.offset.x,
+                                          iview->base_mip);
+      hzp.ClearRectangleYMin = anv_minify(cmd_state->render_area.offset.y,
+                                          iview->base_mip);
+      hzp.ClearRectangleXMax = anv_minify(cmd_state->render_area.offset.x +
+                                          cmd_state->render_area.extent.width,
+                                          iview->base_mip);
+      hzp.ClearRectangleYMax = anv_minify(cmd_state->render_area.offset.y +
+                                          cmd_state->render_area.extent.height,
+                                          iview->base_mip);
+
+      /* Due to a hardware issue, this bit MBZ */
+      hzp.ScissorRectangleEnable = false;
+      hzp.NumberofMultisamples = ffs(iview->image->samples) - 1;
+      hzp.SampleMask = 0xFFFF;
+   }
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+      pc.PostSyncOperation = WriteImmediateData;
+      pc.Address =
+         (struct anv_address){ &cmd_buffer->device->workaround_bo, 0 };
+   }
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_HZ_OP), hzp);
+
+   /* TODO: Determine if a DepthFlush and DepthStall is really necessary for
+    *       non-full_surface_op clears.
+    */
+}
+
 void genX(CmdSetEvent)(
     VkCommandBuffer                             commandBuffer,
     VkEvent                                     _event,
-- 
2.9.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to