Module: Mesa
Branch: main
Commit: 75b75c6c0a92acf771e7407cc03c4687b8605cd6
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=75b75c6c0a92acf771e7407cc03c4687b8605cd6

Author: Qiang Yu <[email protected]>
Date:   Thu Apr 13 21:00:34 2023 +0800

ac/llvm,radeonsi: use texture non-uniform flag as waterfall switch

Also for calling nir_lower_non_uniform_access() when ACO.

Reviewed-by: Marek Olšák <[email protected]>
Signed-off-by: Qiang Yu <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22523>

---

 src/amd/llvm/ac_nir_to_llvm.c                      | 26 ++++------------------
 .../drivers/radeonsi/si_nir_lower_resource.c       |  6 ++---
 src/gallium/drivers/radeonsi/si_shader.c           |  2 --
 src/gallium/drivers/radeonsi/si_shader_llvm.c      |  1 -
 src/gallium/drivers/radeonsi/si_shader_nir.c       |  7 ++++++
 5 files changed, 13 insertions(+), 29 deletions(-)

diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index 9739e3a0c2d..4c538e8c165 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -4399,8 +4399,6 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, 
nir_tex_instr *instr,
                            struct waterfall_context *wctx, LLVMValueRef 
*res_ptr,
                            LLVMValueRef *samp_ptr)
 {
-   bool texture_handle_divergent = false;
-   bool sampler_handle_divergent = false;
    LLVMValueRef texture_dynamic_handle = NULL;
    LLVMValueRef sampler_dynamic_handle = NULL;
    int plane = -1;
@@ -4418,14 +4416,10 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, 
nir_tex_instr *instr,
             else
                *samp_ptr = val;
          } else {
-            bool divergent = instr->src[i].src.ssa->divergent;
-            if (instr->src[i].src_type == nir_tex_src_texture_handle) {
+            if (instr->src[i].src_type == nir_tex_src_texture_handle)
                texture_dynamic_handle = val;
-               texture_handle_divergent = divergent;
-            } else {
+            else
                sampler_dynamic_handle = val;
-               sampler_handle_divergent = divergent;
-            }
          }
          break;
       }
@@ -4455,23 +4449,11 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, 
nir_tex_instr *instr,
       main_descriptor = AC_DESC_FMASK;
    }
 
-   /* instr->sampler_non_uniform and texture_non_uniform are always false in 
GLSL,
-    * but this can lead to unexpected behavior if texture/sampler index come 
from
-    * a vertex attribute.
-    * For instance, 2 consecutive draws using 2 different index values,
-    * could be squashed together by the hw - producing a single draw with
-    * non-dynamically uniform index.
-    * To avoid this, detect divergent indexing, and use enter_waterfall.
-    * See https://gitlab.freedesktop.org/mesa/mesa/-/issues/2253.
-    */
-
    /* descriptor handles given through nir_tex_src_{texture,sampler}_handle */
-   if (instr->texture_non_uniform ||
-       (ctx->abi->use_waterfall_for_divergent_tex_samplers && 
texture_handle_divergent))
+   if (instr->texture_non_uniform)
       texture_dynamic_handle = enter_waterfall(ctx, &wctx[0], 
texture_dynamic_handle, true);
 
-   if (instr->sampler_non_uniform ||
-       (ctx->abi->use_waterfall_for_divergent_tex_samplers && 
sampler_handle_divergent))
+   if (instr->sampler_non_uniform)
       sampler_dynamic_handle = enter_waterfall(ctx, &wctx[1], 
sampler_dynamic_handle, true);
 
    if (texture_dynamic_handle)
diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_resource.c 
b/src/gallium/drivers/radeonsi/si_nir_lower_resource.c
index 1a1f7988ea7..26962f22337 100644
--- a/src/gallium/drivers/radeonsi/si_nir_lower_resource.c
+++ b/src/gallium/drivers/radeonsi/si_nir_lower_resource.c
@@ -499,8 +499,6 @@ static nir_ssa_def *load_bindless_sampler_desc(nir_builder 
*b, nir_ssa_def *inde
 static bool lower_resource_tex(nir_builder *b, nir_tex_instr *tex,
                                struct lower_resource_state *s)
 {
-   assert(!tex->texture_non_uniform && !tex->sampler_non_uniform);
-
    nir_deref_instr *texture_deref = NULL;
    nir_deref_instr *sampler_deref = NULL;
    nir_ssa_def *texture_handle = NULL;
@@ -554,12 +552,12 @@ static bool lower_resource_tex(nir_builder *b, 
nir_tex_instr *tex,
    }
 
    nir_ssa_def *image = texture_deref ?
-      load_deref_sampler_desc(b, texture_deref, desc_type, s, false) :
+      load_deref_sampler_desc(b, texture_deref, desc_type, s, 
!tex->texture_non_uniform) :
       load_bindless_sampler_desc(b, texture_handle, desc_type, s);
 
    nir_ssa_def *sampler = NULL;
    if (sampler_deref)
-      sampler = load_deref_sampler_desc(b, sampler_deref, AC_DESC_SAMPLER, s, 
false);
+      sampler = load_deref_sampler_desc(b, sampler_deref, AC_DESC_SAMPLER, s, 
!tex->sampler_non_uniform);
    else if (sampler_handle)
       sampler = load_bindless_sampler_desc(b, sampler_handle, AC_DESC_SAMPLER, 
s);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index f83636e4ced..2e1a3d83f75 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2200,8 +2200,6 @@ struct nir_shader *si_get_nir_shader(struct si_shader 
*shader,
    if (progress || progress2 || opt_offsets)
       si_nir_late_opts(nir);
 
-   NIR_PASS_V(nir, nir_divergence_analysis);
-
    /* This helps LLVM form VMEM clauses and thus get more GPU cache hits.
     * 200 is tuned for Viewperf. It should be done last.
     */
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c 
b/src/gallium/drivers/radeonsi/si_shader_llvm.c
index 3f6acf85ba2..1955112bc78 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -975,7 +975,6 @@ static bool si_llvm_translate_nir(struct si_shader_context 
*ctx, struct si_shade
    ctx->abi.load_grid_size_from_user_sgpr = true;
    ctx->abi.clamp_div_by_zero = ctx->screen->options.clamp_div_by_zero ||
                                 info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO;
-   ctx->abi.use_waterfall_for_divergent_tex_samplers = true;
    ctx->abi.disable_aniso_single_level = true;
    ctx->abi.conformant_trunc_coord = ctx->screen->info.conformant_trunc_coord;
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 1b735d970d9..e5424c459ee 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -460,5 +460,12 @@ char *si_finalize_nir(struct pipe_screen *screen, void 
*nirptr)
    NIR_PASS_V(nir, nir_convert_to_lcssa, true, true); /* required by 
divergence analysis */
    NIR_PASS_V(nir, nir_divergence_analysis); /* to find divergent loops */
 
+   /* Must be after divergence analysis. */
+   bool divergence_changed = false;
+   NIR_PASS(divergence_changed, nir, si_mark_divergent_texture_non_uniform);
+   /* Re-analysis whole shader if texture instruction divergence changed. */
+   if (divergence_changed)
+      NIR_PASS_V(nir, nir_divergence_analysis);
+
    return NULL;
 }

Reply via email to