Hi,
The patch series I have submitted handles the case of needing to resolve
texture surfaces when a draw (or compute) accesses a texture which is astc5x5.
As it is quite clear you understand the issue and know the code of i965 the
patch centers on, you are an excellent person to review the code.
Here are my comments of the patch posted:
1. it is essentially replication and moving around of the code of the patch
series posted earlier but missing various
important bits: preventing the sampler from using the auxiliary buffer
(this requires to modify surface state
sent in brw_wm_surfaces.c). It also does not cover blorp sufficiently
(blorp might read from an ASTC5x5
and there are more paths in blorp than blorp_surf_for_miptree() that
sample from surfaces.
2. using the check that the GPU is gen9 (and for that matter, using the name
gen9_ astc5x5_sampler_wa())
is not ideal; I would not be surprised that the bug might not be present
on various re-spins of Gen9 and/or
it might linger on to future Gens. I do NOT know; using a Boolean
assigned earlier makes the code more
future-ish proof.
3. the nature of GPU fragment dispatch is going to require a texture
invalidate even if the sampler only
have the bug in one direction; this is because a subslice is not
guaranteed to process fragments in any
order. The crux is that a single sampler serves an entire sub-slice which
has more than 1 EU. It is quite
possible that one EU has threads of a draw call ahead of the other and
depending on the timing, portions
of those fragments' coming after might be processed by the sampler of
those before of those fragments
coming before in batchbuffer order. Indeed a single EU might have threads
from separate draws as well.
A texture invalidate places a barrier in the pipeline preventing the
mixing (and means that efficiency of
GPU drops quite a bit with every texture invalidate sadly).
4. With 3 in mind, using the bit-masks is not a good idea as we want to then
enforce at the code level
that only one of the two is possible without texture invalidates.
-Kevin
-----Original Message-----
From: Topi Pohjolainen [mailto:[email protected]]
Sent: Monday, December 4, 2017 12:49 PM
To: [email protected]
Cc: Rogovin, Kevin <[email protected]>
Subject: [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug
This is just drafting some thoughts and only compile tested.
CC: "Rogovin, Kevin" <[email protected]>
---
src/mesa/drivers/dri/i965/brw_blorp.c | 8 +++++
src/mesa/drivers/dri/i965/brw_context.h | 10 ++++++
src/mesa/drivers/dri/i965/brw_draw.c | 54 ++++++++++++++++++++++++++++++++-
3 files changed, 71 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 680121b6ab..b3f84ab8ca 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -186,11 +186,19 @@ blorp_surf_for_miptree(struct brw_context *brw,
surf->aux_addr.buffer = mt->hiz_buf->bo;
surf->aux_addr.offset = mt->hiz_buf->offset;
}
+
+ if (!is_render_target && brw->screen->devinfo.gen == 9)
+ gen9_astc5x5_sampler_wa(brw, GEN9_ASTC5X5_WA_TEX_TYPE_AUX);
} else {
surf->aux_addr = (struct blorp_address) {
.buffer = NULL,
};
memset(&surf->clear_color, 0, sizeof(surf->clear_color));
+
+ if (!is_render_target && brw->screen->devinfo.gen == 9 &&
+ (mt->format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+ mt->format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5))
+ gen9_astc5x5_sampler_wa(brw,
+ GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5);
}
assert((surf->aux_usage == ISL_AUX_USAGE_NONE) ==
(surf->aux_addr.buffer == NULL)); diff --git
a/src/mesa/drivers/dri/i965/brw_context.h
b/src/mesa/drivers/dri/i965/brw_context.h
index 0670483806..44602c23c0 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -165,6 +165,11 @@ enum brw_cache_id {
BRW_MAX_CACHE
};
+enum gen9_astc5x5_wa_tex_type {
+ GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0,
+ GEN9_ASTC5X5_WA_TEX_TYPE_AUX = 1 << 1,
+};
+
enum brw_state_id {
/* brw_cache_ids must come first - see brw_program_cache.c */
BRW_STATE_URB_FENCE = BRW_MAX_CACHE, @@ -1262,6 +1267,8 @@ struct
brw_context
*/
bool draw_aux_buffer_disabled[MAX_DRAW_BUFFERS];
+ enum gen9_astc5x5_wa_tex_type gen9_sampler_wa_tex_mask;
+
__DRIcontext *driContext;
struct intel_screen *screen;
};
@@ -1286,6 +1293,9 @@ void intel_update_renderbuffers(__DRIcontext *context,
__DRIdrawable *drawable); void
intel_prepare_render(struct brw_context *brw);
+void gen9_astc5x5_sampler_wa(struct brw_context *brw,
+ enum gen9_astc5x5_wa_tex_type curr_mask);
+
void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering);
void intel_resolve_for_dri2_flush(struct brw_context *brw, diff --git
a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 7e29dcfd4e..929f806eb3 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -371,6 +371,50 @@ intel_disable_rb_aux_buffer(struct brw_context *brw,
return found;
}
+static enum gen9_astc5x5_wa_tex_type
+gen9_astc5x5_wa_get_tex_mask(const struct brw_context *brw) {
+ enum gen9_astc5x5_wa_tex_type mask = 0;
+ const struct gl_context *ctx = &brw->ctx;
+ const struct intel_texture_object *tex_obj;
+
+ const int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
+ for (int i = 0; i <= maxEnabledUnit; i++) {
+ if (!ctx->Texture.Unit[i]._Current)
+ continue;
+ tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
+ if (!tex_obj || !tex_obj->mt)
+ continue;
+
+ if (tex_obj->mt->aux_usage != ISL_AUX_USAGE_NONE)
+ mask |= GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5;
+
+ if (tex_obj->_Format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+ tex_obj->_Format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5)
+ mask |= GEN9_ASTC5X5_WA_TEX_TYPE_AUX;
+ }
+
+ return mask;
+}
+
+/* TODO: Do we actually need this both ways: astc5x5 followed by aux
+ * and vice-versa? Or is only one direction problematic?
+ */
+void
+gen9_astc5x5_sampler_wa(struct brw_context *brw,
+ enum gen9_astc5x5_wa_tex_type curr_mask) {
+ if ((brw->gen9_sampler_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
+ (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX))
+ brw_emit_pipe_control_flush(brw,
+PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+
+ if ((brw->gen9_sampler_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX) &&
+ (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+
+ brw->gen9_sampler_wa_tex_mask = curr_mask; }
+
/**
* \brief Resolve buffers before drawing.
*
@@ -383,6 +427,12 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool
rendering)
struct gl_context *ctx = &brw->ctx;
struct intel_texture_object *tex_obj;
+ const enum gen9_astc5x5_wa_tex_type curr_wa_mask =
+ (brw->screen->devinfo.gen == 9) ?
+ gen9_astc5x5_wa_get_tex_mask(brw) : 0;
+
+ if (brw->screen->devinfo.gen == 9)
+ gen9_astc5x5_sampler_wa(brw, curr_wa_mask);
+
memset(brw->draw_aux_buffer_disabled, 0,
sizeof(brw->draw_aux_buffer_disabled));
@@ -413,6 +463,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool
rendering)
num_layers = INTEL_REMAINING_LAYERS;
}
+ const bool sampler_wa_disable_aux =
+ curr_wa_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5;
const bool disable_aux = rendering &&
intel_disable_rb_aux_buffer(brw, tex_obj->mt, min_level, num_levels,
"for sampling"); @@ -420,7 +472,7 @@
brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering)
intel_miptree_prepare_texture(brw, tex_obj->mt, view_format,
min_level, num_levels,
min_layer, num_layers,
- disable_aux);
+ disable_aux ||
+ sampler_wa_disable_aux);
brw_cache_flush_for_read(brw, tex_obj->mt->bo);
--
2.14.1
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev