Module: Mesa Branch: main Commit: 56d30bf591272ac3708336bb918ceb35878d1388 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=56d30bf591272ac3708336bb918ceb35878d1388
Author: Giancarlo Devich <[email protected]> Date: Fri Feb 24 11:50:57 2023 -0800 d3d12: Track max varying slot, set and compare less bytes Often, the full range of available slots don't need to participate in the comparison or be zeroed out. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21527> --- src/gallium/drivers/d3d12/d3d12_compiler.cpp | 66 +++++++++++++++++----------- src/gallium/drivers/d3d12/d3d12_compiler.h | 1 + 2 files changed, 41 insertions(+), 26 deletions(-) diff --git a/src/gallium/drivers/d3d12/d3d12_compiler.cpp b/src/gallium/drivers/d3d12/d3d12_compiler.cpp index b56b4986102..cca8cf3699f 100644 --- a/src/gallium/drivers/d3d12/d3d12_compiler.cpp +++ b/src/gallium/drivers/d3d12/d3d12_compiler.cpp @@ -549,7 +549,8 @@ static void fill_varyings(struct d3d12_varying_info *info, nir_shader *s, nir_variable_mode modes, uint64_t mask, bool patch) { - memset(info, 0, sizeof(d3d12_varying_info)); + info->max = 0; + info->mask = 0; nir_foreach_variable_with_modes(var, s, modes) { unsigned slot = var->data.location; @@ -563,6 +564,11 @@ fill_varyings(struct d3d12_varying_info *info, nir_shader *s, if (!(mask & slot_bit)) continue; + if ((info->mask & slot_bit) == 0) { + memset(info->slots + slot, 0, sizeof(info->slots[0])); + info->max = MAX2(info->max, slot); + } + const struct glsl_type *type = var->type; if ((s->info.stage == MESA_SHADER_GEOMETRY || s->info.stage == MESA_SHADER_TESS_CTRL) && @@ -579,6 +585,13 @@ fill_varyings(struct d3d12_varying_info *info, nir_shader *s, info->mask |= slot_bit; info->slots[slot].location_frac_mask |= (1 << var->data.location_frac); } + + for (uint32_t i = 0; i < info->max; ++i) { + if (((1llu << i) & info->mask) == 0) { + memset(info->slots + i, 0, sizeof(info->slots[0])); + } + } + } static void @@ -594,6 +607,32 @@ fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs) } } +bool +d3d12_compare_varying_info(const d3d12_varying_info *expect, const d3d12_varying_info *have) +{ + if (expect->mask != have->mask + || expect->max != have->max) + return false; + + if (!expect->mask) + return true; + + /* 6 is a rough (wild) guess for a bulk memcmp cross-over point. When there + * are a small number of slots present, individual is much faster. */ + if (util_bitcount64(expect->mask) < 6) { + uint64_t mask = expect->mask; + while (mask) { + int slot = u_bit_scan64(&mask); + if (memcmp(&expect->slots[slot], &have->slots[slot], sizeof(have->slots[slot]))) + return false; + } + + return true; + } + + return !memcmp(expect->slots, have->slots, sizeof(expect->slots[0]) * expect->max); +} + static void validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx) { @@ -676,31 +715,6 @@ validate_tess_ctrl_shader_variant(struct d3d12_selection_context *sel_ctx) ctx->gfx_stages[PIPE_SHADER_TESS_CTRL] = tcs; } -static bool -d3d12_compare_varying_info(const d3d12_varying_info *expect, const d3d12_varying_info *have) -{ - if (expect->mask != have->mask) - return false; - - if (!expect->mask) - return true; - - /* 6 is a rough (wild) guess for a bulk memcmp cross-over point. When there - * are a small number of slots present, individual memcmp is much faster. */ - if (util_bitcount64(expect->mask) < 6) { - uint64_t mask = expect->mask; - while (mask) { - int slot = u_bit_scan64(&mask); - if (memcmp(&expect->slots[slot], &have->slots[slot], sizeof(have->slots[slot]))) - return false; - } - - return true; - } - - return !memcmp(expect, have, sizeof(struct d3d12_varying_info)); -} - static bool d3d12_compare_shader_keys(struct d3d12_selection_context* sel_ctx, const d3d12_shader_key *expect, const d3d12_shader_key *have) { diff --git a/src/gallium/drivers/d3d12/d3d12_compiler.h b/src/gallium/drivers/d3d12/d3d12_compiler.h index 0ebc8cc642e..957170928dc 100644 --- a/src/gallium/drivers/d3d12/d3d12_compiler.h +++ b/src/gallium/drivers/d3d12/d3d12_compiler.h @@ -76,6 +76,7 @@ struct d3d12_varying_info { } vars[4]; } slots[VARYING_SLOT_MAX]; uint64_t mask; + uint32_t max; }; struct d3d12_image_format_conversion_info {
