Module: Mesa
Branch: main
Commit: dd3e9be413ad1c5a2c834451af7af0acccae4241
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dd3e9be413ad1c5a2c834451af7af0acccae4241

Author: Georg Lehmann <[email protected]>
Date:   Tue Jul 26 19:55:36 2022 +0200

radeonsi: Stop using nir_legalize_16bit_sampler_srcs.

This pass is problematic because it truncates sources when there are type
mismatches. With the right nir_fold_16bit_tex_image options we can ensure
that there is never a mismatch.

Other changes: bias is affected by A16 https://reviews.llvm.org/D111754
And enable G16 as we recently confirmed that it's fully independent from A16.

Signed-off-by: Georg Lehmann <[email protected]>
Reviewed-by: Marek Olšák <[email protected]>
Reviewed-by: Emma Anholt <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17757>

---

 src/gallium/drivers/radeonsi/si_shader_nir.c | 63 +++++++++++++---------------
 1 file changed, 29 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 24631ea3ae9..6776bff33f5 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -155,53 +155,48 @@ void si_nir_late_opts(nir_shader *nir)
 
 static void si_late_optimize_16bit_samplers(struct si_screen *sscreen, 
nir_shader *nir)
 {
-   /* Optimize and fix types of image_sample sources and destinations.
+   /* Optimize types of image_sample sources and destinations.
     *
-    * The image_sample constraints are:
-    *   nir_tex_src_coord:       has_a16 ? select 16 or 32 : 32
+    * The image_sample sources bit sizes are:
+    *   nir_tex_src_coord:       a16 ? 16 : 32
     *   nir_tex_src_comparator:  32
     *   nir_tex_src_offset:      32
-    *   nir_tex_src_bias:        32
-    *   nir_tex_src_lod:         match coord
-    *   nir_tex_src_min_lod:     match coord
-    *   nir_tex_src_ms_index:    match coord
-    *   nir_tex_src_ddx:         has_g16 && coord == 32 ? select 16 or 32 : 
match coord
-    *   nir_tex_src_ddy:         match ddy
+    *   nir_tex_src_bias:        a16 ? 16 : 32
+    *   nir_tex_src_lod:         a16 ? 16 : 32
+    *   nir_tex_src_min_lod:     a16 ? 16 : 32
+    *   nir_tex_src_ms_index:    a16 ? 16 : 32
+    *   nir_tex_src_ddx:         has_g16 ? (g16 ? 16 : 32) : (a16 ? 16 : 32)
+    *   nir_tex_src_ddy:         has_g16 ? (g16 ? 16 : 32) : (a16 ? 16 : 32)
     *
-    * coord and ddx are selected optimally. The types of the rest are legalized
-    * based on those two.
+    * We only use a16/g16 if all of the affected sources are 16bit.
     */
-   /* TODO: The constraints can't represent the ddx constraint. */
-   /*bool has_g16 = sscreen->info.gfx_level >= GFX10 && LLVM_VERSION_MAJOR >= 
12;*/
-   bool has_g16 = false;
-   nir_tex_src_type_constraints tex_constraints = {
-      [nir_tex_src_comparator]   = {true, 32},
-      [nir_tex_src_offset]       = {true, 32},
-      [nir_tex_src_bias]         = {true, 32},
-      [nir_tex_src_lod]          = {true, 0, nir_tex_src_coord},
-      [nir_tex_src_min_lod]      = {true, 0, nir_tex_src_coord},
-      [nir_tex_src_ms_index]     = {true, 0, nir_tex_src_coord},
-      [nir_tex_src_ddx]          = {!has_g16, 0, nir_tex_src_coord},
-      [nir_tex_src_ddy]          = {true, 0, has_g16 ? nir_tex_src_ddx : 
nir_tex_src_coord},
-   };
-   bool changed = false;
-
-   struct nir_fold_tex_srcs_options fold_srcs_options = {
-      .sampler_dims = ~BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE),
-      .src_types = (1 << nir_tex_src_coord) |
-                   (has_g16 ? 1 << nir_tex_src_ddx : 0),
+   bool has_g16 = sscreen->info.gfx_level >= GFX10 && LLVM_VERSION_MAJOR >= 12;
+   struct nir_fold_tex_srcs_options fold_srcs_options[] = {
+      {
+         .sampler_dims =
+            ~(BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | 
BITFIELD_BIT(GLSL_SAMPLER_DIM_BUF)),
+         .src_types = (1 << nir_tex_src_coord) | (1 << nir_tex_src_lod) |
+                      (1 << nir_tex_src_bias) | (1 << nir_tex_src_min_lod) |
+                      (1 << nir_tex_src_ms_index) |
+                      (has_g16 ? 0 : (1 << nir_tex_src_ddx) | (1 << 
nir_tex_src_ddy)),
+         .only_fold_all = true,
+      },
+      {
+         .sampler_dims = ~BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE),
+         .src_types = (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy),
+         .only_fold_all = true,
+      },
    };
    struct nir_fold_16bit_tex_image_options fold_16bit_options = {
       .rounding_mode = nir_rounding_mode_rtne,
       .fold_tex_dest = true,
       .fold_image_load_store_data = true,
-      .fold_srcs_options_count = 1,
-      .fold_srcs_options = &fold_srcs_options,
+      .fold_srcs_options_count = has_g16 ? 2 : 1,
+      .fold_srcs_options = fold_srcs_options,
    };
+   bool changed = false;
    NIR_PASS(changed, nir, nir_fold_16bit_tex_image, &fold_16bit_options);
 
-   NIR_PASS(changed, nir, nir_legalize_16bit_sampler_srcs, tex_constraints);
-
    if (changed) {
       si_nir_opts(sscreen, nir, false);
       si_nir_late_opts(nir);

Reply via email to