On Thu, Sep 06, 2018 at 03:50:42PM -0500, Jason Ekstrand wrote: > From: Topi Pohjolainen <[email protected]> > > gen9 hardware has a bug in the sampler cache that can cause GPU hangs > whenever an texture with aux compression enabled is in the sampler cache > together with an ASTC5x5 texture. Because we can't control what the > client binds at any given time, we have two options: resolve the CCS or > decompresss the ASTC. Doing a CCS or HiZ resolve is far less drastic > and will likely have a smaller performance impact. > > Co-authored-by: Jason Ekstrand <[email protected]>
To me it looks you have pretty much re-written all the relevant parts, and I don't see any reason why I shouldn't review. Thanks for picking this up: Reviewed-by: Topi Pohjolainen <[email protected]> > --- > src/mesa/drivers/dri/i965/brw_blorp.c | 6 +- > src/mesa/drivers/dri/i965/brw_context.h | 11 +++ > src/mesa/drivers/dri/i965/brw_draw.c | 95 ++++++++++++++++++- > .../drivers/dri/i965/brw_wm_surface_state.c | 6 +- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 19 +++- > src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 6 +- > 6 files changed, 131 insertions(+), 12 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c > b/src/mesa/drivers/dri/i965/brw_blorp.c > index 7476cee43a4..ad747e0766e 100644 > --- a/src/mesa/drivers/dri/i965/brw_blorp.c > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c > @@ -187,6 +187,9 @@ blorp_surf_for_miptree(struct brw_context *brw, > assert((surf->aux_usage == ISL_AUX_USAGE_NONE) == > (surf->aux_addr.buffer == NULL)); > > + if (!is_render_target && brw->screen->devinfo.gen == 9) > + gen9_apply_single_tex_astc5x5_wa(brw, mt->format, surf->aux_usage); > + > /* ISL wants real levels, not offset ones. */ > *level -= mt->first_level; > } > @@ -382,7 +385,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw, > enum isl_format src_isl_format = > brw_blorp_to_isl_format(brw, src_format, false); > enum isl_aux_usage src_aux_usage = > - intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format); > + intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format, > + 0 /* The astc5x5 WA isn't needed */); > /* We do format workarounds for some depth formats so we can't reliably > * sample with HiZ. One of these days, we should fix that. > */ > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index d3b96953467..bf2cddebdc6 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -168,6 +168,11 @@ enum brw_cache_id { > BRW_MAX_CACHE > }; > > +enum gen9_astc5x5_wa_tex_type { > + GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0, > + GEN9_ASTC5X5_WA_TEX_TYPE_AUX = 1 << 1, > +}; > + > enum brw_state_id { > /* brw_cache_ids must come first - see brw_program_cache.c */ > BRW_STATE_URB_FENCE = BRW_MAX_CACHE, > @@ -1326,6 +1331,8 @@ struct brw_context > */ > enum isl_aux_usage draw_aux_usage[MAX_DRAW_BUFFERS]; > > + enum gen9_astc5x5_wa_tex_type gen9_astc5x5_wa_tex_mask; > + > __DRIcontext *driContext; > struct intel_screen *screen; > }; > @@ -1350,6 +1357,10 @@ void intel_update_renderbuffers(__DRIcontext *context, > __DRIdrawable *drawable); > void intel_prepare_render(struct brw_context *brw); > > +void gen9_apply_single_tex_astc5x5_wa(struct brw_context *brw, > + mesa_format format, > + enum isl_aux_usage aux_usage); > + > void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering, > bool *draw_aux_buffer_disabled); > > diff --git a/src/mesa/drivers/dri/i965/brw_draw.c > b/src/mesa/drivers/dri/i965/brw_draw.c > index 71461d7b0a7..8536c040109 100644 > --- a/src/mesa/drivers/dri/i965/brw_draw.c > +++ b/src/mesa/drivers/dri/i965/brw_draw.c > @@ -378,6 +378,68 @@ intel_disable_rb_aux_buffer(struct brw_context *brw, > return found; > } > > +/** Implement the ASTC 5x5 sampler workaround > + * > + * Gen9 sampling hardware has a bug where an ASTC 5x5 compressed surface > + * cannot live in the sampler cache at the same time as an aux compressed > + * surface. In order to work around the bug we have to stall rendering with > a > + * CS and pixel scoreboard stall (implicit in the CS stall) and invalidate > the > + * texture cache whenever one of ASTC 5x5 or aux compressed may be in the > + * sampler cache and we're about to render with something which samples from > + * the other. > + * > + * In the case of a single shader which textures from both ASTC 5x5 and > + * a texture which is CCS or HiZ compressed, we have to resolve the aux > + * compressed texture prior to rendering. This second part is handled in > + * brw_predraw_resolve_inputs() below. > + * > + * We have observed this issue to affect CCS and HiZ sampling but whether or > + * not it also affects MCS is unknown. Because MCS has no concept of a > + * resolve (and doing one would be stupid expensive), we choose to simply > + * ignore the possibility and hope for the best. > + */ > +static void > +gen9_apply_astc5x5_wa_flush(struct brw_context *brw, > + enum gen9_astc5x5_wa_tex_type curr_mask) > +{ > + assert(brw->screen->devinfo.gen == 9); > + > + if (((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) && > + (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX)) || > + ((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX) && > + (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))) { > + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL); > + brw_emit_pipe_control_flush(brw, > PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); > + } > + > + brw->gen9_astc5x5_wa_tex_mask = curr_mask; > +} > + > +static enum gen9_astc5x5_wa_tex_type > +gen9_astc5x5_wa_bits(mesa_format format, enum isl_aux_usage aux_usage) > +{ > + if (aux_usage != ISL_AUX_USAGE_NONE && > + aux_usage != ISL_AUX_USAGE_MCS) > + return GEN9_ASTC5X5_WA_TEX_TYPE_AUX; > + > + if (format == MESA_FORMAT_RGBA_ASTC_5x5 || > + format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5) > + return GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5; > + > + return 0; > +} > + > +/* Helper for the gen9 ASTC 5x5 workaround. This version exists for BLORP's > + * use-cases where only a single texture is bound. > + */ > +void > +gen9_apply_single_tex_astc5x5_wa(struct brw_context *brw, > + mesa_format format, > + enum isl_aux_usage aux_usage) > +{ > + gen9_apply_astc5x5_wa_flush(brw, gen9_astc5x5_wa_bits(format, aux_usage)); > +} > + > static void > mark_textures_used_for_txf(BITSET_WORD *used_for_txf, > const struct gl_program *prog) > @@ -417,8 +479,30 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool > rendering, > mark_textures_used_for_txf(used_for_txf, ctx->ComputeProgram._Current); > } > > - /* Resolve depth buffer and render cache of each enabled texture. */ > int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; > + > + enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits = 0; > + if (brw->screen->devinfo.gen == 9) { > + /* In order to properly implement the ASTC 5x5 workaround for an > + * arbitrary draw or dispatch call, we have to walk the entire list of > + * textures looking for ASTC 5x5. If there is any ASTC 5x5 in this > draw > + * call, all aux compressed textures must be resolved and have aux > + * compression disabled while sampling. > + */ > + for (int i = 0; i <= maxEnabledUnit; i++) { > + if (!ctx->Texture.Unit[i]._Current) > + continue; > + tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); > + if (!tex_obj || !tex_obj->mt) > + continue; > + > + astc5x5_wa_bits |= gen9_astc5x5_wa_bits(tex_obj->_Format, > + tex_obj->mt->aux_usage); > + } > + gen9_apply_astc5x5_wa_flush(brw, astc5x5_wa_bits); > + } > + > + /* Resolve depth buffer and render cache of each enabled texture. */ > for (int i = 0; i <= maxEnabledUnit; i++) { > if (!ctx->Texture.Unit[i]._Current) > continue; > @@ -452,7 +536,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool > rendering, > > intel_miptree_prepare_texture(brw, tex_obj->mt, view_format, > min_level, num_levels, > - min_layer, num_layers); > + min_layer, num_layers, > + astc5x5_wa_bits); > > /* If any programs are using it with texelFetch, we may need to also do > * a prepare with an sRGB format to ensure texelFetch works "properly". > @@ -463,7 +548,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool > rendering, > if (txf_format != view_format) { > intel_miptree_prepare_texture(brw, tex_obj->mt, txf_format, > min_level, num_levels, > - min_layer, num_layers); > + min_layer, num_layers, > + astc5x5_wa_bits); > } > } > > @@ -535,7 +621,8 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw, > if (irb) { > intel_miptree_prepare_texture(brw, irb->mt, irb->mt->surf.format, > irb->mt_level, 1, > - irb->mt_layer, irb->layer_count); > + irb->mt_layer, irb->layer_count, > + brw->gen9_astc5x5_wa_tex_mask); > } > } > } > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > index 42af41aca32..944762ec46b 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > @@ -603,7 +603,8 @@ static void brw_update_texture_surface(struct gl_context > *ctx, > view.usage |= ISL_SURF_USAGE_CUBE_BIT; > > enum isl_aux_usage aux_usage = > - intel_miptree_texture_aux_usage(brw, mt, format); > + intel_miptree_texture_aux_usage(brw, mt, format, > + brw->gen9_astc5x5_wa_tex_mask); > > brw_emit_surface_state(brw, mt, mt->target, view, aux_usage, > surf_offset, surf_index, > @@ -1107,7 +1108,8 @@ update_renderbuffer_read_surfaces(struct brw_context > *brw) > }; > > enum isl_aux_usage aux_usage = > - intel_miptree_texture_aux_usage(brw, irb->mt, format); > + intel_miptree_texture_aux_usage(brw, irb->mt, format, > + > brw->gen9_astc5x5_wa_tex_mask); > if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE) > aux_usage = ISL_AUX_USAGE_NONE; > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index 983f145afc9..36681352ba7 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -2542,8 +2542,19 @@ can_texture_with_ccs(struct brw_context *brw, > enum isl_aux_usage > intel_miptree_texture_aux_usage(struct brw_context *brw, > struct intel_mipmap_tree *mt, > - enum isl_format view_format) > + enum isl_format view_format, > + enum gen9_astc5x5_wa_tex_type > astc5x5_wa_bits) > { > + assert(brw->screen->devinfo.gen == 9 || astc5x5_wa_bits == 0); > + > + /* On gen9, ASTC 5x5 textures cannot live in the sampler cache along side > + * CCS or HiZ compressed textures. See gen9_apply_astc5x5_wa_flush() for > + * details. > + */ > + if ((astc5x5_wa_bits & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) && > + mt->aux_usage != ISL_AUX_USAGE_MCS) > + return ISL_AUX_USAGE_NONE; > + > switch (mt->aux_usage) { > case ISL_AUX_USAGE_HIZ: > if (intel_miptree_sample_with_hiz(brw, mt)) > @@ -2601,10 +2612,12 @@ intel_miptree_prepare_texture(struct brw_context *brw, > struct intel_mipmap_tree *mt, > enum isl_format view_format, > uint32_t start_level, uint32_t num_levels, > - uint32_t start_layer, uint32_t num_layers) > + uint32_t start_layer, uint32_t num_layers, > + enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits) > { > enum isl_aux_usage aux_usage = > - intel_miptree_texture_aux_usage(brw, mt, view_format); > + intel_miptree_texture_aux_usage(brw, mt, view_format, astc5x5_wa_bits); > + > bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE; > > /* Clear color is specified as ints or floats and the conversion is done > by > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > index bb7df7ad235..08c129a4b8b 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > @@ -621,13 +621,15 @@ intel_miptree_access_raw(struct brw_context *brw, > enum isl_aux_usage > intel_miptree_texture_aux_usage(struct brw_context *brw, > struct intel_mipmap_tree *mt, > - enum isl_format view_format); > + enum isl_format view_format, > + enum gen9_astc5x5_wa_tex_type > astc5x5_wa_bits); > void > intel_miptree_prepare_texture(struct brw_context *brw, > struct intel_mipmap_tree *mt, > enum isl_format view_format, > uint32_t start_level, uint32_t num_levels, > - uint32_t start_layer, uint32_t num_layers); > + uint32_t start_layer, uint32_t num_layers, > + enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits); > void > intel_miptree_prepare_image(struct brw_context *brw, > struct intel_mipmap_tree *mt); > -- > 2.17.1 > _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
