Module: Mesa Branch: main Commit: 029c686c6dbe37639bf6d391bb9361488a6a5ea6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=029c686c6dbe37639bf6d391bb9361488a6a5ea6
Author: Alyssa Rosenzweig <[email protected]> Date: Fri Feb 17 18:24:38 2023 -0500 asahi: Implement color masks with masked stores Blend states can require masking colour. Currently, this is handled by nir_lower_blend, which lowers masks to a read-modify-write operation as required on Mali hardware. However, our "tilebuffer store" instruction supports a write mask, allowing us to write only a subset of channels to the tilebuffer. It's more efficient to use that than to emit pointless tilebuffer loads. Note that even without tilebuffer loads, non-opaque masks don't work with opaque pass types. Here, we handle this with a translucent pass type, which gets HSR to do the right thing and is consistent with the pass type used previously. However, it's a bit heavy handed -- Apple manages to use an opaque pass type with masking but with some unknown HSR fields twiddled. IMO reverse-engineering those details shouldn't block this because this gets us closer to optimal (just not all the way there) and is strictly better than what we had before. Signed-off-by: Alyssa Rosenzweig <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21431> --- src/asahi/lib/agx_meta.c | 2 +- src/asahi/lib/agx_nir_lower_tilebuffer.c | 58 +++++++++++++++++++++++++++----- src/asahi/lib/agx_tilebuffer.c | 8 +++++ src/asahi/lib/agx_tilebuffer.h | 6 +++- src/gallium/drivers/asahi/agx_state.c | 32 ++++++++++++++++-- 5 files changed, 94 insertions(+), 12 deletions(-) diff --git a/src/asahi/lib/agx_meta.c b/src/asahi/lib/agx_meta.c index a0dcdc2421f..bbd862175ca 100644 --- a/src/asahi/lib/agx_meta.c +++ b/src/asahi/lib/agx_meta.c @@ -19,7 +19,7 @@ agx_compile_meta_shader(struct agx_meta_cache *cache, nir_shader *shader, agx_preprocess_nir(shader); if (tib) - agx_nir_lower_tilebuffer(shader, tib); + agx_nir_lower_tilebuffer(shader, tib, NULL, NULL); struct agx_meta_shader *res = rzalloc(cache->ht, struct agx_meta_shader); agx_compile_shader_nir(shader, key, NULL, &binary, &res->info); diff --git a/src/asahi/lib/agx_nir_lower_tilebuffer.c b/src/asahi/lib/agx_nir_lower_tilebuffer.c index 3d49c2841cd..43a295b413f 100644 --- a/src/asahi/lib/agx_nir_lower_tilebuffer.c +++ b/src/asahi/lib/agx_nir_lower_tilebuffer.c @@ -3,6 +3,7 @@ * SPDX-License-Identifier: MIT */ +#include "compiler/agx_internal_formats.h" #include "agx_nir_format_helpers.h" #include "agx_tilebuffer.h" #include "nir.h" @@ -10,6 +11,12 @@ #define ALL_SAMPLES 0xFF +struct ctx { + struct agx_tilebuffer_layout *tib; + uint8_t *colormasks; + bool *translucent; +}; + static bool tib_filter(const nir_instr *instr, UNUSED const void *_) { @@ -29,7 +36,8 @@ tib_filter(const nir_instr *instr, UNUSED const void *_) static nir_ssa_def * tib_impl(nir_builder *b, nir_instr *instr, void *data) { - struct agx_tilebuffer_layout *tib = data; + struct ctx *ctx = data; + struct agx_tilebuffer_layout *tib = ctx->tib; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); nir_io_semantics sem = nir_intrinsic_io_semantics(intr); @@ -41,10 +49,38 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data) unsigned comps = util_format_get_nr_components(logical_format); if (intr->intrinsic == nir_intrinsic_store_output) { + /* Only write components that actually exist */ + uint16_t write_mask = BITFIELD_MASK(comps); + /* Delete stores to nonexistant render targets */ if (logical_format == PIPE_FORMAT_NONE) return NIR_LOWER_INSTR_PROGRESS_REPLACE; + /* Only write colours masked by the blend state */ + if (ctx->colormasks) + write_mask &= ctx->colormasks[rt]; + + /* Masked stores require a translucent pass type */ + if (write_mask != BITFIELD_MASK(comps)) { + assert(ctx->translucent != NULL && + "colour masking requires translucency"); + + assert(agx_internal_format_supports_mask(format) && + "write mask but format cannot be masked"); + + *(ctx->translucent) = true; + } + + /* But we ignore the NIR write mask for that, since it's basically an + * optimization hint. + */ + if (agx_internal_format_supports_mask(format)) + write_mask &= nir_intrinsic_write_mask(intr); + + /* Delete stores that are entirely masked out */ + if (!write_mask) + return NIR_LOWER_INSTR_PROGRESS_REPLACE; + nir_ssa_def *value = intr->src[0].ssa; /* Trim to format as required by hardware */ @@ -60,11 +96,9 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data) value = nir_f2f32(b, value); } - nir_store_local_pixel_agx( - b, value, nir_imm_intN_t(b, ALL_SAMPLES, 16), - .base = tib->offset_B[rt], - .write_mask = nir_intrinsic_write_mask(intr) & BITFIELD_MASK(comps), - .format = format); + nir_store_local_pixel_agx(b, value, nir_imm_intN_t(b, ALL_SAMPLES, 16), + .base = tib->offset_B[rt], + .write_mask = write_mask, .format = format); return NIR_LOWER_INSTR_PROGRESS_REPLACE; } else { @@ -101,8 +135,16 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data) } bool -agx_nir_lower_tilebuffer(nir_shader *shader, struct agx_tilebuffer_layout *tib) +agx_nir_lower_tilebuffer(nir_shader *shader, struct agx_tilebuffer_layout *tib, + uint8_t *colormasks, bool *translucent) { assert(shader->info.stage == MESA_SHADER_FRAGMENT); - return nir_shader_lower_instructions(shader, tib_filter, tib_impl, tib); + + struct ctx ctx = { + .tib = tib, + .colormasks = colormasks, + .translucent = translucent, + }; + + return nir_shader_lower_instructions(shader, tib_filter, tib_impl, &ctx); } diff --git a/src/asahi/lib/agx_tilebuffer.c b/src/asahi/lib/agx_tilebuffer.c index 81df1d25636..0c724881833 100644 --- a/src/asahi/lib/agx_tilebuffer.c +++ b/src/asahi/lib/agx_tilebuffer.c @@ -5,6 +5,7 @@ #include "agx_tilebuffer.h" #include <assert.h> +#include "compiler/agx_internal_formats.h" #include "util/format/u_format.h" #include "agx_formats.h" #include "agx_usc.h" @@ -77,6 +78,13 @@ agx_tilebuffer_physical_format(struct agx_tilebuffer_layout *tib, unsigned rt) return agx_pixel_format[tib->logical_format[rt]].internal; } +bool +agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout *tib, unsigned rt) +{ + enum pipe_format fmt = agx_tilebuffer_physical_format(tib, rt); + return agx_internal_format_supports_mask((enum agx_internal_formats)fmt); +} + static unsigned agx_shared_layout_from_tile_size(struct agx_tile_size t) { diff --git a/src/asahi/lib/agx_tilebuffer.h b/src/asahi/lib/agx_tilebuffer.h index 2a120037a9e..4e57e265ca6 100644 --- a/src/asahi/lib/agx_tilebuffer.h +++ b/src/asahi/lib/agx_tilebuffer.h @@ -47,7 +47,8 @@ agx_build_tilebuffer_layout(enum pipe_format *formats, uint8_t nr_cbufs, uint8_t nr_samples); bool agx_nir_lower_tilebuffer(struct nir_shader *shader, - struct agx_tilebuffer_layout *tib); + struct agx_tilebuffer_layout *tib, + uint8_t *colormasks, bool *translucent); void agx_usc_tilebuffer(struct agx_usc_builder *b, struct agx_tilebuffer_layout *tib); @@ -57,6 +58,9 @@ uint32_t agx_tilebuffer_total_size(struct agx_tilebuffer_layout *tib); enum pipe_format agx_tilebuffer_physical_format(struct agx_tilebuffer_layout *tib, unsigned rt); +bool agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout *tib, + unsigned rt); + #ifdef __cplusplus } /* extern C */ #endif diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index de26f576dd0..3fa69f72783 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1370,6 +1370,8 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so, nir_shader *nir = nir_shader_clone(NULL, so->nir); + bool force_translucent = false; + if (nir->info.stage == MESA_SHADER_VERTEX) { struct asahi_vs_shader_key *key = &key_->vs; @@ -1393,9 +1395,25 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so, opts.format[i] = key->rt_formats[i]; memcpy(opts.rt, key->blend.rt, sizeof(opts.rt)); - NIR_PASS_V(nir, nir_lower_blend, &opts); - NIR_PASS_V(nir, agx_nir_lower_tilebuffer, &tib); + /* It's more efficient to use masked stores (with + * agx_nir_lower_tilebuffer) than to emulate colour masking with + * nir_lower_blend. + */ + uint8_t colormasks[PIPE_MAX_COLOR_BUFS] = {0}; + + for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) { + if (agx_tilebuffer_supports_mask(&tib, i)) { + colormasks[i] = key->blend.rt[i].colormask; + opts.rt[i].colormask = BITFIELD_MASK(4); + } else { + colormasks[i] = BITFIELD_MASK(4); + } + } + + NIR_PASS_V(nir, nir_lower_blend, &opts); + NIR_PASS_V(nir, agx_nir_lower_tilebuffer, &tib, colormasks, + &force_translucent); if (key->sprite_coord_enable) { NIR_PASS_V(nir, nir_lower_texcoord_replace_late, @@ -1415,6 +1433,16 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so, agx_compile_shader_nir(nir, &base_key, debug, &binary, &compiled->info); + /* reads_tib => Translucent pass type */ + compiled->info.reads_tib |= force_translucent; + + /* Could be optimized to use non-translucent pass types with the appropriate + * HSR configuration, but that mechanism is not yet understood. Warn that + * we're leaving perf on the table when used. + */ + if (force_translucent) + perf_debug(dev, "Translucency forced due to colour masking"); + if (binary.size) { compiled->bo = agx_bo_create(dev, binary.size, AGX_BO_EXEC | AGX_BO_LOW_VA, "Executable");
