Module: Mesa Branch: main Commit: 75b75c6c0a92acf771e7407cc03c4687b8605cd6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=75b75c6c0a92acf771e7407cc03c4687b8605cd6
Author: Qiang Yu <[email protected]> Date: Thu Apr 13 21:00:34 2023 +0800 ac/llvm,radeonsi: use texture non-uniform flag as waterfall switch Also for calling nir_lower_non_uniform_access() when ACO. Reviewed-by: Marek Olšák <[email protected]> Signed-off-by: Qiang Yu <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22523> --- src/amd/llvm/ac_nir_to_llvm.c | 26 ++++------------------ .../drivers/radeonsi/si_nir_lower_resource.c | 6 ++--- src/gallium/drivers/radeonsi/si_shader.c | 2 -- src/gallium/drivers/radeonsi/si_shader_llvm.c | 1 - src/gallium/drivers/radeonsi/si_shader_nir.c | 7 ++++++ 5 files changed, 13 insertions(+), 29 deletions(-) diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 9739e3a0c2d..4c538e8c165 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -4399,8 +4399,6 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr, struct waterfall_context *wctx, LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr) { - bool texture_handle_divergent = false; - bool sampler_handle_divergent = false; LLVMValueRef texture_dynamic_handle = NULL; LLVMValueRef sampler_dynamic_handle = NULL; int plane = -1; @@ -4418,14 +4416,10 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr, else *samp_ptr = val; } else { - bool divergent = instr->src[i].src.ssa->divergent; - if (instr->src[i].src_type == nir_tex_src_texture_handle) { + if (instr->src[i].src_type == nir_tex_src_texture_handle) texture_dynamic_handle = val; - texture_handle_divergent = divergent; - } else { + else sampler_dynamic_handle = val; - sampler_handle_divergent = divergent; - } } break; } @@ -4455,23 +4449,11 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr, main_descriptor = AC_DESC_FMASK; } - /* instr->sampler_non_uniform and texture_non_uniform are always false in GLSL, - * but this can lead to unexpected behavior if texture/sampler index come from - * a vertex attribute. - * For instance, 2 consecutive draws using 2 different index values, - * could be squashed together by the hw - producing a single draw with - * non-dynamically uniform index. - * To avoid this, detect divergent indexing, and use enter_waterfall. - * See https://gitlab.freedesktop.org/mesa/mesa/-/issues/2253. - */ - /* descriptor handles given through nir_tex_src_{texture,sampler}_handle */ - if (instr->texture_non_uniform || - (ctx->abi->use_waterfall_for_divergent_tex_samplers && texture_handle_divergent)) + if (instr->texture_non_uniform) texture_dynamic_handle = enter_waterfall(ctx, &wctx[0], texture_dynamic_handle, true); - if (instr->sampler_non_uniform || - (ctx->abi->use_waterfall_for_divergent_tex_samplers && sampler_handle_divergent)) + if (instr->sampler_non_uniform) sampler_dynamic_handle = enter_waterfall(ctx, &wctx[1], sampler_dynamic_handle, true); if (texture_dynamic_handle) diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_resource.c b/src/gallium/drivers/radeonsi/si_nir_lower_resource.c index 1a1f7988ea7..26962f22337 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_resource.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_resource.c @@ -499,8 +499,6 @@ static nir_ssa_def *load_bindless_sampler_desc(nir_builder *b, nir_ssa_def *inde static bool lower_resource_tex(nir_builder *b, nir_tex_instr *tex, struct lower_resource_state *s) { - assert(!tex->texture_non_uniform && !tex->sampler_non_uniform); - nir_deref_instr *texture_deref = NULL; nir_deref_instr *sampler_deref = NULL; nir_ssa_def *texture_handle = NULL; @@ -554,12 +552,12 @@ static bool lower_resource_tex(nir_builder *b, nir_tex_instr *tex, } nir_ssa_def *image = texture_deref ? - load_deref_sampler_desc(b, texture_deref, desc_type, s, false) : + load_deref_sampler_desc(b, texture_deref, desc_type, s, !tex->texture_non_uniform) : load_bindless_sampler_desc(b, texture_handle, desc_type, s); nir_ssa_def *sampler = NULL; if (sampler_deref) - sampler = load_deref_sampler_desc(b, sampler_deref, AC_DESC_SAMPLER, s, false); + sampler = load_deref_sampler_desc(b, sampler_deref, AC_DESC_SAMPLER, s, !tex->sampler_non_uniform); else if (sampler_handle) sampler = load_bindless_sampler_desc(b, sampler_handle, AC_DESC_SAMPLER, s); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f83636e4ced..2e1a3d83f75 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2200,8 +2200,6 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, if (progress || progress2 || opt_offsets) si_nir_late_opts(nir); - NIR_PASS_V(nir, nir_divergence_analysis); - /* This helps LLVM form VMEM clauses and thus get more GPU cache hits. * 200 is tuned for Viewperf. It should be done last. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 3f6acf85ba2..1955112bc78 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -975,7 +975,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade ctx->abi.load_grid_size_from_user_sgpr = true; ctx->abi.clamp_div_by_zero = ctx->screen->options.clamp_div_by_zero || info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO; - ctx->abi.use_waterfall_for_divergent_tex_samplers = true; ctx->abi.disable_aniso_single_level = true; ctx->abi.conformant_trunc_coord = ctx->screen->info.conformant_trunc_coord; diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 1b735d970d9..e5424c459ee 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -460,5 +460,12 @@ char *si_finalize_nir(struct pipe_screen *screen, void *nirptr) NIR_PASS_V(nir, nir_convert_to_lcssa, true, true); /* required by divergence analysis */ NIR_PASS_V(nir, nir_divergence_analysis); /* to find divergent loops */ + /* Must be after divergence analysis. */ + bool divergence_changed = false; + NIR_PASS(divergence_changed, nir, si_mark_divergent_texture_non_uniform); + /* Re-analysis whole shader if texture instruction divergence changed. */ + if (divergence_changed) + NIR_PASS_V(nir, nir_divergence_analysis); + return NULL; }
