On Thu, Feb 08, 2018 at 05:23:21PM -0800, Jason Ekstrand wrote: > On Thu, Feb 8, 2018 at 5:20 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > > > On Fri, Jan 12, 2018 at 2:45 PM, Nanley Chery <nanleych...@gmail.com> > > wrote: > > > >> On Mon, Nov 27, 2017 at 07:06:09PM -0800, Jason Ekstrand wrote: > >> > --- > >> > src/intel/vulkan/anv_blorp.c | 243 ++++++++++++++++-------------- > >> ------- > >> > src/intel/vulkan/anv_private.h | 17 ++- > >> > src/intel/vulkan/genX_cmd_buffer.c | 68 ++++++++++- > >> > 3 files changed, 188 insertions(+), 140 deletions(-) > >> > > >> > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c > >> > index 7401234..45d7b12 100644 > >> > --- a/src/intel/vulkan/anv_blorp.c > >> > +++ b/src/intel/vulkan/anv_blorp.c > >> > @@ -1132,143 +1132,6 @@ enum subpass_stage { > >> > SUBPASS_STAGE_RESOLVE, > >> > }; > >> > > >> > -static bool > >> > -subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) > >> > -{ > >> > - const struct anv_cmd_state *cmd_state = &cmd_buffer->state; > >> > - uint32_t ds = cmd_state->subpass->depth_sten > >> cil_attachment.attachment; > >> > - > >> > - if (ds != VK_ATTACHMENT_UNUSED) { > >> > - assert(ds < cmd_state->pass->attachment_count); > >> > - if (cmd_state->attachments[ds].pending_clear_aspects) > >> > - return true; > >> > - } > >> > - > >> > - return false; > >> > -} > >> > - > >> > -void > >> > -anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) > >> > -{ > >> > - const struct anv_cmd_state *cmd_state = &cmd_buffer->state; > >> > - const VkRect2D render_area = cmd_buffer->state.render_area; > >> > - > >> > - > >> > - if (!subpass_needs_clear(cmd_buffer)) > >> > - return; > >> > - > >> > - /* Because this gets called within a render pass, we tell blorp not > >> to > >> > - * trash our depth and stencil buffers. > >> > - */ > >> > - struct blorp_batch batch; > >> > - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, > >> > - BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); > >> > - > >> > - VkClearRect clear_rect = { > >> > - .rect = cmd_buffer->state.render_area, > >> > - .baseArrayLayer = 0, > >> > - .layerCount = cmd_buffer->state.framebuffer->layers, > >> > - }; > >> > - > >> > - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; > >> > - > >> > - const uint32_t ds = cmd_state->subpass->depth_sten > >> cil_attachment.attachment; > >> > - assert(ds == VK_ATTACHMENT_UNUSED || ds < > >> cmd_state->pass->attachment_count); > >> > - > >> > - if (ds != VK_ATTACHMENT_UNUSED && > >> > - cmd_state->attachments[ds].pending_clear_aspects) { > >> > - > >> > - VkClearAttachment clear_att = { > >> > - .aspectMask = cmd_state->attachments[ds].pen > >> ding_clear_aspects, > >> > - .clearValue = cmd_state->attachments[ds].clear_value, > >> > - }; > >> > - > >> > - > >> > - const uint8_t gen = cmd_buffer->device->info.gen; > >> > - bool clear_with_hiz = gen >= 8 && > >> > cmd_state->attachments[ds].aux_usage > >> == > >> > - ISL_AUX_USAGE_HIZ; > >> > - const struct anv_image_view *iview = fb->attachments[ds]; > >> > - > >> > - if (clear_with_hiz) { > >> > - const bool clear_depth = clear_att.aspectMask & > >> > - VK_IMAGE_ASPECT_DEPTH_BIT; > >> > - const bool clear_stencil = clear_att.aspectMask & > >> > - VK_IMAGE_ASPECT_STENCIL_BIT; > >> > - > >> > - /* Check against restrictions for depth buffer clearing. A > >> great GPU > >> > - * performance benefit isn't expected when using the HZ > >> sequence for > >> > - * stencil-only clears. Therefore, we don't emit a HZ op > >> sequence for > >> > - * a stencil clear in addition to using the BLORP-fallback > >> for depth. > >> > - */ > >> > - if (clear_depth) { > >> > - if (!blorp_can_hiz_clear_depth(gen, > >> iview->planes[0].isl.format, > >> > - iview->image->samples, > >> > - render_area.offset.x, > >> > - render_area.offset.y, > >> > - render_area.offset.x + > >> > - render_area.extent.width, > >> > - render_area.offset.y + > >> > - render_area.extent.height)) > >> { > >> > - clear_with_hiz = false; > >> > - } else if (clear_att.clearValue.depthStencil.depth != > >> > - ANV_HZ_FC_VAL) { > >> > - /* Don't enable fast depth clears for any color not > >> equal to > >> > - * ANV_HZ_FC_VAL. > >> > - */ > >> > - clear_with_hiz = false; > >> > - } else if (gen == 8 && > >> > - anv_can_sample_with_hiz(&cmd_ > >> buffer->device->info, > >> > - iview->image)) { > >> > - /* Only gen9+ supports returning ANV_HZ_FC_VAL when > >> sampling a > >> > - * fast-cleared portion of a HiZ buffer. Testing has > >> revealed > >> > - * that Gen8 only supports returning 0.0f. Gens prior > >> to gen8 do > >> > - * not support this feature at all. > >> > - */ > >> > - clear_with_hiz = false; > >> > - } > >> > - } > >> > - > >> > - if (clear_with_hiz) { > >> > - blorp_gen8_hiz_clear_attachments(&batch, > >> iview->image->samples, > >> > - render_area.offset.x, > >> > - render_area.offset.y, > >> > - render_area.offset.x + > >> > - render_area.extent.width, > >> > - render_area.offset.y + > >> > - render_area.extent.height, > >> > - clear_depth, > >> clear_stencil, > >> > - clear_att.clearValue. > >> > - depthStencil.stencil); > >> > - > >> > - /* From the SKL PRM, Depth Buffer Clear: > >> > - * > >> > - * Depth Buffer Clear Workaround > >> > - * Depth buffer clear pass using any of the methods > >> (WM_STATE, > >> > - * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a > >> > - * PIPE_CONTROL command with DEPTH_STALL bit and Depth > >> FLUSH bits > >> > - * “set” before starting to render. DepthStall and > >> DepthFlush are > >> > - * not needed between consecutive depth clear passes nor > >> is it > >> > - * required if the depth-clear pass was done with > >> “full_surf_clear” > >> > - * bit set in the 3DSTATE_WM_HZ_OP. > >> > - */ > >> > - if (clear_depth) { > >> > - cmd_buffer->state.pending_pipe_bits |= > >> > - ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | > >> ANV_PIPE_DEPTH_STALL_BIT; > >> > - } > >> > - } > >> > - } > >> > - > >> > - if (!clear_with_hiz) { > >> > - clear_depth_stencil_attachment(cmd_buffer, &batch, > >> > - &clear_att, 1, &clear_rect); > >> > - } > >> > - > >> > - cmd_state->attachments[ds].pending_clear_aspects = 0; > >> > - } > >> > - > >> > - blorp_batch_finish(&batch); > >> > -} > >> > - > >> > static void > >> > resolve_surface(struct blorp_batch *batch, > >> > struct blorp_surf *src_surf, > >> > @@ -1568,6 +1431,53 @@ anv_image_clear_color(struct anv_cmd_buffer > >> *cmd_buffer, > >> > } > >> > > >> > void > >> > +anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer, > >> > + const struct anv_image *image, > >> > + VkImageAspectFlags aspects, > >> > + enum isl_aux_usage depth_aux_usage, > >> > + uint32_t level, > >> > + uint32_t base_layer, uint32_t > >> layer_count, > >> > + VkRect2D area, > >> > + float depth_value, uint8_t stencil_value) > >> > +{ > >> > + assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | > >> > + VK_IMAGE_ASPECT_STENCIL_BIT)); > >> > + > >> > + struct blorp_batch batch; > >> > + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, > >> 0); > >> > + > >> > + struct blorp_surf depth, stencil; > >> > + if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { > >> > >> Why use image->aspects instead of aspects? > >>
Ping. Either way, this should work. > >> > + get_blorp_surf_for_anv_image(cmd_buffer->device, > >> > + image, VK_IMAGE_ASPECT_DEPTH_BIT, > >> > + depth_aux_usage, &depth); > >> > + depth.clear_color.f32[0] = ANV_HZ_FC_VAL; > >> > + } else { > >> > + memset(&stencil, 0, sizeof(stencil)); > >> > >> The depth blorp_surf should be memset here. > >> > > > > Fixed. See reply to Topi > > > > > >> > + } > >> > + > >> > + if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { > >> > + get_blorp_surf_for_anv_image(cmd_buffer->device, > >> > + image, VK_IMAGE_ASPECT_STENCIL_BIT, > >> > + ISL_AUX_USAGE_NONE, &stencil); > >> > + } else { > >> > + memset(&stencil, 0, sizeof(stencil)); > >> > + } > >> > + > >> > + blorp_clear_depth_stencil(&batch, &depth, &stencil, > >> > + level, base_layer, layer_count, > >> > + area.offset.x, area.offset.y, > >> > + area.offset.x + area.extent.width, > >> > + area.offset.y + area.extent.height, > >> > + aspects & VK_IMAGE_ASPECT_DEPTH_BIT, > >> > + depth_value, > >> > + (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? > >> 0xff : 0, > >> > + stencil_value); > >> > + > >> > + blorp_batch_finish(&batch); > >> > +} > >> > + > >> > +void > >> > anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer, > >> > const struct anv_image *image, > >> > VkImageAspectFlagBits aspect, uint32_t level, > >> > @@ -1595,6 +1505,65 @@ anv_image_hiz_op(struct anv_cmd_buffer > >> *cmd_buffer, > >> > } > >> > > >> > void > >> > +anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, > >> > + const struct anv_image *image, > >> > + VkImageAspectFlags aspects, > >> > + uint32_t level, > >> > + uint32_t base_layer, uint32_t layer_count, > >> > + VkRect2D area, uint8_t stencil_value) > >> > +{ > >> > + assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | > >> > + VK_IMAGE_ASPECT_STENCIL_BIT)); > >> > + assert(base_layer + layer_count <= > >> > + anv_image_aux_layers(image, VK_IMAGE_ASPECT_DEPTH_BIT, 0)); > >> > + > >> > + struct blorp_batch batch; > >> > + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, > >> 0); > >> > + > >> > + struct blorp_surf depth; > >> > + get_blorp_surf_for_anv_image(cmd_buffer->device, > >> > + image, VK_IMAGE_ASPECT_DEPTH_BIT, > >> > + ISL_AUX_USAGE_HIZ, &depth); > >> > >> Shouldn't we check if aspects contains depth before calling this? > >> > > > > This is a HiZ clear. We had better have depth. I'll adjust the assert at > > the top. > > > > That's true. > >> > + depth.clear_color.f32[0] = ANV_HZ_FC_VAL; > >> > + > >> > + struct blorp_surf stencil; > >> > + if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { > >> > >> Why use image->aspects instead of aspects? > >> > > > > No good reason. I'll change to aspects. > > > > > >> > + get_blorp_surf_for_anv_image(cmd_buffer->device, > >> > + image, VK_IMAGE_ASPECT_STENCIL_BIT, > >> > + ISL_AUX_USAGE_NONE, &stencil); > >> > + } else { > >> > + memset(&stencil, 0, sizeof(stencil)); > >> > + } > >> > + > >> > + blorp_hiz_clear_depth_stencil(&batch, &depth, &stencil, > >> > + level, base_layer, layer_count, > >> > + area.offset.x, area.offset.y, > >> > + area.offset.x + area.extent.width, > >> > + area.offset.y + area.extent.height, > >> > + aspects & VK_IMAGE_ASPECT_DEPTH_BIT, > >> > + ANV_HZ_FC_VAL, > >> > + aspects & VK_IMAGE_ASPECT_STENCIL_BIT, > >> > + stencil_value); > >> > + > >> > + blorp_batch_finish(&batch); > >> > + > >> > + /* From the SKL PRM, Depth Buffer Clear: > >> > + * > >> > + * Depth Buffer Clear Workaround > >> > + * Depth buffer clear pass using any of the methods (WM_STATE, > >> 3DSTATE_WM > >> > + * or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL command > >> with > >> > + * DEPTH_STALL bit and Depth FLUSH bits “set” before starting to > >> render. > >> > + * DepthStall and DepthFlush are not needed between consecutive > >> depth clear > >> > + * passes nor is it required if the depth-clear pass was done with > >> > + * “full_surf_clear” bit set in the 3DSTATE_WM_HZ_OP. > >> > + */ > >> > + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { > >> > + cmd_buffer->state.pending_pipe_bits |= > >> > + ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT; > >> > + } > >> > +} > >> > + > >> > +void > >> > anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, > >> > const struct anv_image *image, > >> > VkImageAspectFlagBits aspect, > >> > diff --git a/src/intel/vulkan/anv_private.h > >> b/src/intel/vulkan/anv_private.h > >> > index bc355bb..b881157 100644 > >> > --- a/src/intel/vulkan/anv_private.h > >> > +++ b/src/intel/vulkan/anv_private.h > >> > @@ -1875,7 +1875,6 @@ anv_cmd_buffer_push_constants(struct > >> anv_cmd_buffer *cmd_buffer, > >> > struct anv_state > >> > anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); > >> > > >> > -void anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer); > >> > void anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer > >> *cmd_buffer); > >> > > >> > const struct anv_image_view * > >> > @@ -2545,12 +2544,28 @@ anv_image_clear_color(struct anv_cmd_buffer > >> *cmd_buffer, > >> > uint32_t level, uint32_t base_layer, uint32_t > >> layer_count, > >> > VkRect2D area, union isl_color_value > >> clear_color); > >> > void > >> > +anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer, > >> > + const struct anv_image *image, > >> > + VkImageAspectFlags aspects, > >> > + enum isl_aux_usage depth_aux_usage, > >> > + uint32_t level, > >> > + uint32_t base_layer, uint32_t > >> layer_count, > >> > + VkRect2D area, > >> > + float depth_value, uint8_t > >> stencil_value); > >> > +void > >> > anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer, > >> > const struct anv_image *image, > >> > VkImageAspectFlagBits aspect, uint32_t level, > >> > uint32_t base_layer, uint32_t layer_count, > >> > enum isl_aux_op hiz_op); > >> > void > >> > +anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, > >> > + const struct anv_image *image, > >> > + VkImageAspectFlags aspects, > >> > + uint32_t level, > >> > + uint32_t base_layer, uint32_t layer_count, > >> > + VkRect2D area, uint8_t stencil_value); > >> > +void > >> > anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, > >> > const struct anv_image *image, > >> > VkImageAspectFlagBits aspect, > >> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c > >> b/src/intel/vulkan/genX_cmd_buffer.c > >> > index 265ae44..57685bd 100644 > >> > --- a/src/intel/vulkan/genX_cmd_buffer.c > >> > +++ b/src/intel/vulkan/genX_cmd_buffer.c > >> > @@ -3216,9 +3216,73 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer > >> *cmd_buffer, > >> > att_state->pending_clear_aspects = 0; > >> > } > >> > > >> > - cmd_buffer_emit_depth_stencil(cmd_buffer); > >> > + if (subpass->depth_stencil_attachment.attachment != > >> VK_ATTACHMENT_UNUSED) { > >> > + const uint32_t a = subpass->depth_stencil_attachment.attachment; > >> > + > >> > + assert(a < cmd_state->pass->attachment_count); > >> > + struct anv_attachment_state *att_state = > >> &cmd_state->attachments[a]; > >> > + struct anv_image_view *iview = fb->attachments[a]; > >> > + const struct anv_image *image = iview->image; > >> > + > >> > + assert(image->aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | > >> > + VK_IMAGE_ASPECT_STENCIL_BIT)); > >> > + > >> > + if (att_state->pending_clear_aspects) { > >> > + bool clear_with_hiz = att_state->aux_usage == > >> ISL_AUX_USAGE_HIZ; > >> > + if (clear_with_hiz && > >> > + (att_state->pending_clear_aspects & > >> VK_IMAGE_ASPECT_DEPTH_BIT)) { > >> > + if (!blorp_can_hiz_clear_depth(GEN_GEN, > >> > + iview->planes[0].isl.format, > >> > + iview->image->samples, > >> > + render_area.offset.x, > >> > + render_area.offset.y, > >> > + render_area.offset.x + > >> > + render_area.extent.width, > >> > + render_area.offset.y + > >> > + render_area.extent.height)) > >> { > >> > >> If GEN_GEN < 8, the first assert within the above function will be > >> triggered. The original code checks the gen when assigning > >> clear_with_hiz. > >> > > > > I know I had myself convinced at one point that I could drop the GEN_GEN > > >= 8 but I don't remember now why. I'll add it back in. If nothing else, > > it'll let the compiler dead-code some stuff on HSW. > > > > Now I remember why. We don't support HiZ on gen7 at all. Lots of other > code will have to change before we can enable it so I figured I could just > let it assert for now. > Ah, makes sense. > --Jason > > > > > + clear_with_hiz = false; > >> > + } else if (att_state->clear_value.depthStencil.depth != > >> ANV_HZ_FC_VAL) { > >> > + clear_with_hiz = false; > >> > + } else if (GEN_GEN == 8 && > >> > + anv_can_sample_with_hiz(&cmd_ > >> buffer->device->info, > >> > + iview->image)) { > >> > + /* Only gen9+ supports returning ANV_HZ_FC_VAL when > >> sampling a > >> > + * fast-cleared portion of a HiZ buffer. Testing has > >> revealed > >> > + * that Gen8 only supports returning 0.0f. Gens prior > >> to gen8 > >> > + * do not support this feature at all. > >> > + */ > >> > + clear_with_hiz = false; > >> > + } > >> > + } > >> > > >> > - anv_cmd_buffer_clear_subpass(cmd_buffer); > >> > + if (clear_with_hiz) { > >> > + /* We currently only support HiZ for single-layer images */ > >> ^ > >> "and > >> single-level" ? > >> Ping. Not a big deal, though. > >> > + assert(iview->planes[0].isl.base_level == 0); > >> > + assert(iview->planes[0].isl.base_array_layer == 0); > >> > + assert(fb->layers == 1); > >> > + > >> > + anv_image_hiz_clear(cmd_buffer, image, > >> > + att_state->pending_clear_aspects, > >> > + iview->planes[0].isl.base_level, > >> > + iview->planes[0].isl.base_array_layer, > >> > + fb->layers, render_area, > >> > + att_state->clear_value.depthSt > >> encil.stencil); > >> > + } else { > >> > + anv_image_clear_depth_stencil(cmd_buffer, image, > >> > + > >> att_state->pending_clear_aspects, > >> > + att_state->aux_usage, > >> > + > >> iview->planes[0].isl.base_level, > >> > + > >> iview->planes[0].isl.base_array_layer, > >> > + fb->layers, render_area, > >> > + > >> att_state->clear_value.depthStencil.depth, > >> > + > >> att_state->clear_value.depthStencil.stencil); > >> > >> Do we need to do something special for multiview? I need to spend > >> some time getting familiar with that extension... > >> > > > > Yes. The igalia people have a bunch of multiview clear fixes. I asked > > them to hold off and rebase on this rework. > > > > Got it. With the memset fix applied, this patch is Reviewed-by: Nanley Chery <nanley.g.ch...@intel.com> > >> > + } > >> > + } > >> > + > >> > + att_state->pending_clear_aspects = 0; > >> > + } > >> > + > >> > + cmd_buffer_emit_depth_stencil(cmd_buffer); > >> > } > >> > > >> > static void > >> > -- > >> > 2.5.0.400.gff86faf > >> > > >> > _______________________________________________ > >> > mesa-dev mailing list > >> > mesa-dev@lists.freedesktop.org > >> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > >> > > > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev