On Thu, Sep 06, 2018 at 03:50:42PM -0500, Jason Ekstrand wrote:
> From: Topi Pohjolainen <[email protected]>
> 
> gen9 hardware has a bug in the sampler cache that can cause GPU hangs
> whenever an texture with aux compression enabled is in the sampler cache
> together with an ASTC5x5 texture.  Because we can't control what the
> client binds at any given time, we have two options: resolve the CCS or
> decompresss the ASTC.  Doing a CCS or HiZ resolve is far less drastic
> and will likely have a smaller performance impact.
> 
> Co-authored-by: Jason Ekstrand <[email protected]>

To me it looks you have pretty much re-written all the relevant parts, and I
don't see any reason why I shouldn't review.

Thanks for picking this up:

Reviewed-by: Topi Pohjolainen <[email protected]>

> ---
>  src/mesa/drivers/dri/i965/brw_blorp.c         |  6 +-
>  src/mesa/drivers/dri/i965/brw_context.h       | 11 +++
>  src/mesa/drivers/dri/i965/brw_draw.c          | 95 ++++++++++++++++++-
>  .../drivers/dri/i965/brw_wm_surface_state.c   |  6 +-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 19 +++-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  6 +-
>  6 files changed, 131 insertions(+), 12 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
> b/src/mesa/drivers/dri/i965/brw_blorp.c
> index 7476cee43a4..ad747e0766e 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> @@ -187,6 +187,9 @@ blorp_surf_for_miptree(struct brw_context *brw,
>     assert((surf->aux_usage == ISL_AUX_USAGE_NONE) ==
>            (surf->aux_addr.buffer == NULL));
>  
> +   if (!is_render_target && brw->screen->devinfo.gen == 9)
> +      gen9_apply_single_tex_astc5x5_wa(brw, mt->format, surf->aux_usage);
> +
>     /* ISL wants real levels, not offset ones. */
>     *level -= mt->first_level;
>  }
> @@ -382,7 +385,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>     enum isl_format src_isl_format =
>        brw_blorp_to_isl_format(brw, src_format, false);
>     enum isl_aux_usage src_aux_usage =
> -      intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format);
> +      intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format,
> +                                      0 /* The astc5x5 WA isn't needed */);
>     /* We do format workarounds for some depth formats so we can't reliably
>      * sample with HiZ.  One of these days, we should fix that.
>      */
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index d3b96953467..bf2cddebdc6 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -168,6 +168,11 @@ enum brw_cache_id {
>     BRW_MAX_CACHE
>  };
>  
> +enum gen9_astc5x5_wa_tex_type {
> +   GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0,
> +   GEN9_ASTC5X5_WA_TEX_TYPE_AUX     = 1 << 1,
> +};
> +
>  enum brw_state_id {
>     /* brw_cache_ids must come first - see brw_program_cache.c */
>     BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
> @@ -1326,6 +1331,8 @@ struct brw_context
>      */
>     enum isl_aux_usage draw_aux_usage[MAX_DRAW_BUFFERS];
>  
> +   enum gen9_astc5x5_wa_tex_type gen9_astc5x5_wa_tex_mask;
> +
>     __DRIcontext *driContext;
>     struct intel_screen *screen;
>  };
> @@ -1350,6 +1357,10 @@ void intel_update_renderbuffers(__DRIcontext *context,
>                                  __DRIdrawable *drawable);
>  void intel_prepare_render(struct brw_context *brw);
>  
> +void gen9_apply_single_tex_astc5x5_wa(struct brw_context *brw,
> +                                      mesa_format format,
> +                                      enum isl_aux_usage aux_usage);
> +
>  void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
>                                  bool *draw_aux_buffer_disabled);
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
> b/src/mesa/drivers/dri/i965/brw_draw.c
> index 71461d7b0a7..8536c040109 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -378,6 +378,68 @@ intel_disable_rb_aux_buffer(struct brw_context *brw,
>     return found;
>  }
>  
> +/** Implement the ASTC 5x5 sampler workaround
> + *
> + * Gen9 sampling hardware has a bug where an ASTC 5x5 compressed surface
> + * cannot live in the sampler cache at the same time as an aux compressed
> + * surface.  In order to work around the bug we have to stall rendering with 
> a
> + * CS and pixel scoreboard stall (implicit in the CS stall) and invalidate 
> the
> + * texture cache whenever one of ASTC 5x5 or aux compressed may be in the
> + * sampler cache and we're about to render with something which samples from
> + * the other.
> + *
> + * In the case of a single shader which textures from both ASTC 5x5 and
> + * a texture which is CCS or HiZ compressed, we have to resolve the aux
> + * compressed texture prior to rendering.  This second part is handled in
> + * brw_predraw_resolve_inputs() below.
> + *
> + * We have observed this issue to affect CCS and HiZ sampling but whether or
> + * not it also affects MCS is unknown.  Because MCS has no concept of a
> + * resolve (and doing one would be stupid expensive), we choose to simply
> + * ignore the possibility and hope for the best.
> + */
> +static void
> +gen9_apply_astc5x5_wa_flush(struct brw_context *brw,
> +                            enum gen9_astc5x5_wa_tex_type curr_mask)
> +{
> +   assert(brw->screen->devinfo.gen == 9);
> +
> +   if (((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
> +        (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX)) ||
> +       ((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX) &&
> +        (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))) {
> +      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
> +      brw_emit_pipe_control_flush(brw, 
> PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
> +   }
> +
> +   brw->gen9_astc5x5_wa_tex_mask = curr_mask;
> +}
> +
> +static enum gen9_astc5x5_wa_tex_type
> +gen9_astc5x5_wa_bits(mesa_format format, enum isl_aux_usage aux_usage)
> +{
> +   if (aux_usage != ISL_AUX_USAGE_NONE &&
> +       aux_usage != ISL_AUX_USAGE_MCS)
> +      return GEN9_ASTC5X5_WA_TEX_TYPE_AUX;
> +
> +   if (format == MESA_FORMAT_RGBA_ASTC_5x5 ||
> +       format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5)
> +      return GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5;
> +
> +   return 0;
> +}
> +
> +/* Helper for the gen9 ASTC 5x5 workaround.  This version exists for BLORP's
> + * use-cases where only a single texture is bound.
> + */
> +void
> +gen9_apply_single_tex_astc5x5_wa(struct brw_context *brw,
> +                                 mesa_format format,
> +                                 enum isl_aux_usage aux_usage)
> +{
> +   gen9_apply_astc5x5_wa_flush(brw, gen9_astc5x5_wa_bits(format, aux_usage));
> +}
> +
>  static void
>  mark_textures_used_for_txf(BITSET_WORD *used_for_txf,
>                             const struct gl_program *prog)
> @@ -417,8 +479,30 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
> rendering,
>        mark_textures_used_for_txf(used_for_txf, ctx->ComputeProgram._Current);
>     }
>  
> -   /* Resolve depth buffer and render cache of each enabled texture. */
>     int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
> +
> +   enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits = 0;
> +   if (brw->screen->devinfo.gen == 9) {
> +      /* In order to properly implement the ASTC 5x5 workaround for an
> +       * arbitrary draw or dispatch call, we have to walk the entire list of
> +       * textures looking for ASTC 5x5.  If there is any ASTC 5x5 in this 
> draw
> +       * call, all aux compressed textures must be resolved and have aux
> +       * compression disabled while sampling.
> +       */
> +      for (int i = 0; i <= maxEnabledUnit; i++) {
> +         if (!ctx->Texture.Unit[i]._Current)
> +            continue;
> +         tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
> +         if (!tex_obj || !tex_obj->mt)
> +            continue;
> +
> +         astc5x5_wa_bits |= gen9_astc5x5_wa_bits(tex_obj->_Format,
> +                                                 tex_obj->mt->aux_usage);
> +      }
> +      gen9_apply_astc5x5_wa_flush(brw, astc5x5_wa_bits);
> +   }
> +
> +   /* Resolve depth buffer and render cache of each enabled texture. */
>     for (int i = 0; i <= maxEnabledUnit; i++) {
>        if (!ctx->Texture.Unit[i]._Current)
>        continue;
> @@ -452,7 +536,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
> rendering,
>  
>        intel_miptree_prepare_texture(brw, tex_obj->mt, view_format,
>                                      min_level, num_levels,
> -                                    min_layer, num_layers);
> +                                    min_layer, num_layers,
> +                                    astc5x5_wa_bits);
>  
>        /* If any programs are using it with texelFetch, we may need to also do
>         * a prepare with an sRGB format to ensure texelFetch works "properly".
> @@ -463,7 +548,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
> rendering,
>           if (txf_format != view_format) {
>              intel_miptree_prepare_texture(brw, tex_obj->mt, txf_format,
>                                            min_level, num_levels,
> -                                          min_layer, num_layers);
> +                                          min_layer, num_layers,
> +                                          astc5x5_wa_bits);
>           }
>        }
>  
> @@ -535,7 +621,8 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw,
>           if (irb) {
>              intel_miptree_prepare_texture(brw, irb->mt, irb->mt->surf.format,
>                                            irb->mt_level, 1,
> -                                          irb->mt_layer, irb->layer_count);
> +                                          irb->mt_layer, irb->layer_count,
> +                                          brw->gen9_astc5x5_wa_tex_mask);
>           }
>        }
>     }
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 42af41aca32..944762ec46b 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -603,7 +603,8 @@ static void brw_update_texture_surface(struct gl_context 
> *ctx,
>           view.usage |= ISL_SURF_USAGE_CUBE_BIT;
>  
>        enum isl_aux_usage aux_usage =
> -         intel_miptree_texture_aux_usage(brw, mt, format);
> +         intel_miptree_texture_aux_usage(brw, mt, format,
> +                                         brw->gen9_astc5x5_wa_tex_mask);
>  
>        brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
>                               surf_offset, surf_index,
> @@ -1107,7 +1108,8 @@ update_renderbuffer_read_surfaces(struct brw_context 
> *brw)
>              };
>  
>              enum isl_aux_usage aux_usage =
> -               intel_miptree_texture_aux_usage(brw, irb->mt, format);
> +               intel_miptree_texture_aux_usage(brw, irb->mt, format,
> +                                               
> brw->gen9_astc5x5_wa_tex_mask);
>              if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE)
>                 aux_usage = ISL_AUX_USAGE_NONE;
>  
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 983f145afc9..36681352ba7 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -2542,8 +2542,19 @@ can_texture_with_ccs(struct brw_context *brw,
>  enum isl_aux_usage
>  intel_miptree_texture_aux_usage(struct brw_context *brw,
>                                  struct intel_mipmap_tree *mt,
> -                                enum isl_format view_format)
> +                                enum isl_format view_format,
> +                                enum gen9_astc5x5_wa_tex_type 
> astc5x5_wa_bits)
>  {
> +   assert(brw->screen->devinfo.gen == 9 || astc5x5_wa_bits == 0);
> +
> +   /* On gen9, ASTC 5x5 textures cannot live in the sampler cache along side
> +    * CCS or HiZ compressed textures.  See gen9_apply_astc5x5_wa_flush() for
> +    * details.
> +    */
> +   if ((astc5x5_wa_bits & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
> +       mt->aux_usage != ISL_AUX_USAGE_MCS)
> +      return ISL_AUX_USAGE_NONE;
> +
>     switch (mt->aux_usage) {
>     case ISL_AUX_USAGE_HIZ:
>        if (intel_miptree_sample_with_hiz(brw, mt))
> @@ -2601,10 +2612,12 @@ intel_miptree_prepare_texture(struct brw_context *brw,
>                                struct intel_mipmap_tree *mt,
>                                enum isl_format view_format,
>                                uint32_t start_level, uint32_t num_levels,
> -                              uint32_t start_layer, uint32_t num_layers)
> +                              uint32_t start_layer, uint32_t num_layers,
> +                              enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits)
>  {
>     enum isl_aux_usage aux_usage =
> -      intel_miptree_texture_aux_usage(brw, mt, view_format);
> +      intel_miptree_texture_aux_usage(brw, mt, view_format, astc5x5_wa_bits);
> +
>     bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE;
>  
>     /* Clear color is specified as ints or floats and the conversion is done 
> by
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> index bb7df7ad235..08c129a4b8b 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> @@ -621,13 +621,15 @@ intel_miptree_access_raw(struct brw_context *brw,
>  enum isl_aux_usage
>  intel_miptree_texture_aux_usage(struct brw_context *brw,
>                                  struct intel_mipmap_tree *mt,
> -                                enum isl_format view_format);
> +                                enum isl_format view_format,
> +                                enum gen9_astc5x5_wa_tex_type 
> astc5x5_wa_bits);
>  void
>  intel_miptree_prepare_texture(struct brw_context *brw,
>                                struct intel_mipmap_tree *mt,
>                                enum isl_format view_format,
>                                uint32_t start_level, uint32_t num_levels,
> -                              uint32_t start_layer, uint32_t num_layers);
> +                              uint32_t start_layer, uint32_t num_layers,
> +                              enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits);
>  void
>  intel_miptree_prepare_image(struct brw_context *brw,
>                              struct intel_mipmap_tree *mt);
> -- 
> 2.17.1
> 
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to