Module: Mesa
Branch: main
Commit: 1a1c81987da573ec3e8e625f9312a7bd04998481
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1a1c81987da573ec3e8e625f9312a7bd04998481

Author: M Henning <dra...@darkrefraction.com>
Date:   Thu Nov 23 13:55:53 2023 -0500

nak: Call nir_opt_load_store_vectorize

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26347>

---

 src/nouveau/compiler/nak_nir.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c
index bbb355d1c38..74ed2c2c38a 100644
--- a/src/nouveau/compiler/nak_nir.c
+++ b/src/nouveau/compiler/nak_nir.c
@@ -965,6 +965,27 @@ nak_nir_lower_fs_outputs(nir_shader *nir)
    return true;
 }
 
+static bool
+nak_mem_vectorize_cb(unsigned align_mul, unsigned align_offset,
+                     unsigned bit_size, unsigned num_components,
+                     nir_intrinsic_instr *low, nir_intrinsic_instr *high,
+                     void *cb_data)
+{
+   /*
+    * Since we legalize these later with nir_lower_mem_access_bit_sizes,
+    * we can optimistically combine anything that might be profitable
+    */
+   assert(util_is_power_of_two_nonzero(align_mul));
+
+   unsigned max_bytes = 128u / 8u;
+   if (low->intrinsic == nir_intrinsic_load_ubo)
+      max_bytes = 64u / 8u;
+
+   align_mul = MIN2(align_mul, max_bytes);
+   align_offset = align_offset % align_mul;
+   return align_offset + num_components * (bit_size / 8) <= align_mul;
+}
+
 static nir_mem_access_size_align
 nak_mem_access_size_align(nir_intrinsic_op intrin,
                           uint8_t bytes, uint8_t bit_size,
@@ -1049,8 +1070,18 @@ nak_postprocess_nir(nir_shader *nir,
           glsl_get_natural_size_align_bytes);
       OPT(nir, nir_lower_explicit_io, nir_var_function_temp,
           nir_address_format_32bit_offset);
+      nak_optimize_nir(nir, nak);
    }
 
+   nir_load_store_vectorize_options vectorize_opts = {};
+   vectorize_opts.modes = nir_var_mem_global |
+                          nir_var_mem_ssbo |
+                          nir_var_mem_shared |
+                          nir_var_shader_temp;
+   vectorize_opts.callback = nak_mem_vectorize_cb;
+   vectorize_opts.robust_modes = robust2_modes;
+   OPT(nir, nir_opt_load_store_vectorize, &vectorize_opts);
+
    nir_lower_mem_access_bit_sizes_options mem_bit_size_options = {
       .modes = nir_var_mem_constant | nir_var_mem_ubo | nir_var_mem_generic,
       .callback = nak_mem_access_size_align,

Reply via email to