From: Jason Ekstrand <jason.ekstr...@intel.com> Nanley Chery: (rebase) - Resolve conflicts with the new anv_batch_emit macro (amend) - Update commit title - Combine all HZ operations into one function - Add code for performing HiZ resolve operations - Add proper stencil and multisampling support - Set the proper clear rectangles - Add required cases for aborting an HZ operation
Signed-off-by: Nanley Chery <nanley.g.ch...@intel.com> --- src/intel/vulkan/anv_genX.h | 3 + src/intel/vulkan/anv_private.h | 6 ++ src/intel/vulkan/gen7_cmd_buffer.c | 5 ++ src/intel/vulkan/gen8_cmd_buffer.c | 124 +++++++++++++++++++++++++++++++++++++ 4 files changed, 138 insertions(+) diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index cf5a232..16de990 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -54,6 +54,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer); +void genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, + enum anv_hz_op op); + VkResult genX(graphics_pipeline_create)(VkDevice _device, struct anv_pipeline_cache *cache, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 5718a19..40325fd 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1401,6 +1401,12 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); +enum anv_hz_op { + ANV_HZ_OP_CLEAR, + ANV_HZ_OP_HIZ_RESOLVE, + ANV_HZ_OP_DEPTH_RESOLVE, +}; + struct anv_fence { struct anv_bo bo; struct drm_i915_gem_execbuffer2 execbuf; diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 61778aa..a057a04 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -323,6 +323,11 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty = 0; } +void +genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum anv_hz_op op) +{ +} + void genX(CmdSetEvent)( VkCommandBuffer commandBuffer, VkEvent event, diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index e22b4e2..4f27350 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -399,6 +399,130 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); } + +/** + * Emit the HZ_OP packet in the sequence specified by the BDW PRM section + * entitled: "Optimized Depth Buffer Clear and/or Stencil Buffer Clear." + */ +void +genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum anv_hz_op op) +{ + struct anv_cmd_state *cmd_state = &cmd_buffer->state; + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + + if (iview == NULL || !anv_image_has_hiz(iview->image)) + return; + + const uint32_t ds = cmd_state->subpass->depth_stencil_attachment; + const bool full_surface_op = + cmd_state->render_area.extent.width == iview->extent.width && + cmd_state->render_area.extent.height == iview->extent.height; + + /* Validate that we can perform the HZ operation and that it's necessary. */ + switch (op) { + case ANV_HZ_OP_CLEAR: + if (cmd_buffer->state.pass->attachments[ds].load_op != + VK_ATTACHMENT_LOAD_OP_CLEAR) + return; + + /* Apply alignment restrictions. For a sample count of 16, the formulas + * reduce to identity and indicate that no alignment is required. + */ + if (!full_surface_op && iview->image->samples < 16) { + uint32_t align_w = 1; + uint32_t align_h = 1; + + if (iview->image->samples > 1) { + isl_msaa_interleaved_scale_px_to_sa(iview->image->samples, + &align_w, &align_h); + } + + align_w = 8 / align_w; + align_h = 4 / align_h; + + if (cmd_state->render_area.offset.x % align_w || + cmd_state->render_area.offset.y % align_h || + cmd_state->render_area.extent.width % align_w || + cmd_state->render_area.extent.height % align_h) + return; + } + break; + case ANV_HZ_OP_DEPTH_RESOLVE: + if (cmd_buffer->state.pass->attachments[ds].store_op != + VK_ATTACHMENT_STORE_OP_STORE) + return; + break; + case ANV_HZ_OP_HIZ_RESOLVE: + if (cmd_buffer->state.pass->attachments[ds].load_op != + VK_ATTACHMENT_LOAD_OP_LOAD) + return; + break; + } + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_HZ_OP), hzp) { + switch (op) { + case ANV_HZ_OP_CLEAR: + hzp.StencilBufferClearEnable = VK_IMAGE_ASPECT_STENCIL_BIT & + cmd_state->attachments[ds].pending_clear_aspects; + hzp.DepthBufferClearEnable = VK_IMAGE_ASPECT_DEPTH_BIT & + cmd_state->attachments[ds].pending_clear_aspects; + hzp.FullSurfaceDepthandStencilClear = full_surface_op; + hzp.StencilClearValue = 0xff & + cmd_state->attachments[ds].clear_value.depthStencil.stencil; + + /* Mark aspects as cleared */ + cmd_state->attachments[ds].pending_clear_aspects = 0; + break; + case ANV_HZ_OP_DEPTH_RESOLVE: + hzp.DepthBufferResolveEnable = true; + break; + case ANV_HZ_OP_HIZ_RESOLVE: + hzp.HierarchicalDepthBufferResolveEnable = true; + break; + } + + /* The depth resolve rectangle must match the size of the previous clear + * rectangle. + * + * The HiZ resolve rectangle is specified as needing to be the + * size of the full RT and aligned to 8x4, these requirements are in + * conflict if the RT extent is not a multiple of 8x4. Testing shows + * that setting the rectangle to match the render area works just fine. + * + * In a manner similar to i965, we'd like to diverge from the PRMs here + * to reduce the number of HiZ blocks written to. + */ + hzp.ClearRectangleXMin = anv_minify(cmd_state->render_area.offset.x, + iview->base_mip); + hzp.ClearRectangleYMin = anv_minify(cmd_state->render_area.offset.y, + iview->base_mip); + hzp.ClearRectangleXMax = anv_minify(cmd_state->render_area.offset.x + + cmd_state->render_area.extent.width, + iview->base_mip); + hzp.ClearRectangleYMax = anv_minify(cmd_state->render_area.offset.y + + cmd_state->render_area.extent.height, + iview->base_mip); + + /* Due to a hardware issue, this bit MBZ */ + hzp.ScissorRectangleEnable = false; + hzp.NumberofMultisamples = ffs(iview->image->samples) - 1; + hzp.SampleMask = 0xFFFF; + } + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + pc.PostSyncOperation = WriteImmediateData; + pc.Address = + (struct anv_address){ &cmd_buffer->device->workaround_bo, 0 }; + } + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_HZ_OP), hzp); + + /* TODO: Determine if a DepthFlush and DepthStall is really necessary for + * non-full_surface_op clears. + */ +} + void genX(CmdSetEvent)( VkCommandBuffer commandBuffer, VkEvent _event, -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev