Mesa (main): radeonsi: only vectorize nir ops that aco support

GitLab Mirror Sun, 19 Nov 2023 18:58:19 -0800

Module: Mesa
Branch: main
Commit: 909895ae2a377758812e3ac3098e97e21f952393
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=909895ae2a377758812e3ac3098e97e21f952393


Author: Qiang Yu <yuq...@gmail.com>
Date:   Wed Nov  1 17:29:11 2023 +0800

radeonsi: only vectorize nir ops that aco support

To fix si_compute_blit created nir code compilation with ACO.
Two 16bit vector ops are used in it:
  con 16x2  %11 = u2u16 %10.xy
  con 16x2  %25 = f2f16 %22.xy
which is not supported by ACO yet.

PS. now ACO supports vec2 f2f16.

Reviewed-by: Marek Olšák <marek.ol...@amd.com>
Signed-off-by: Qiang Yu <yuq...@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25990>

---

 src/gallium/drivers/radeonsi/si_shader_nir.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index c85f7b6af8b..7f8219cf8db 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -8,15 +8,17 @@
 #include "nir_xfb_info.h"
 #include "si_pipe.h"
 #include "ac_nir.h"
+#include "aco_interface.h"
 
 
 bool si_alu_to_scalar_packed_math_filter(const nir_instr *instr, const void 
*data)
 {
    if (instr->type == nir_instr_type_alu) {
       nir_alu_instr *alu = nir_instr_as_alu(instr);
+      bool use_aco = (bool)data;
 
-      if (alu->def.bit_size == 16 &&
-          alu->def.num_components == 2)
+      if (alu->def.bit_size == 16 && alu->def.num_components == 2 &&
+          (!use_aco || aco_nir_op_supports_packed_math_16bit(alu)))
          return false;
    }
 
@@ -29,7 +31,14 @@ static uint8_t si_vectorize_callback(const nir_instr *instr, 
const void *data)
       return 0;
 
    nir_alu_instr *alu = nir_instr_as_alu(instr);
-   if (alu->def.bit_size == 16) {
+   if (alu->def.bit_size != 16)
+      return 1;
+
+   bool use_aco = (bool)data;
+
+   if (use_aco) {
+      return aco_nir_op_supports_packed_math_16bit(alu) ? 2 : 1;
+   } else {
       switch (alu->op) {
       case nir_op_unpack_32_2x16_split_x:
       case nir_op_unpack_32_2x16_split_y:
@@ -38,8 +47,6 @@ static uint8_t si_vectorize_callback(const nir_instr *instr, 
const void *data)
          return 2;
       }
    }
-
-   return 1;
 }
 
 static unsigned si_lower_bit_size_callback(const nir_instr *instr, void *data)
@@ -73,7 +80,7 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader 
*nir, bool first)
 
       NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
       NIR_PASS(progress, nir, nir_lower_alu_to_scalar,
-               nir->options->lower_to_scalar_filter, NULL);
+               nir->options->lower_to_scalar_filter, (void *)sscreen->use_aco);
       NIR_PASS(progress, nir, nir_lower_phis_to_scalar, false);
 
       if (first) {
@@ -97,7 +104,7 @@ void si_nir_opts(struct si_screen *sscreen, struct 
nir_shader *nir, bool first)
 
       if (lower_alu_to_scalar) {
          NIR_PASS_V(nir, nir_lower_alu_to_scalar,
-                    nir->options->lower_to_scalar_filter, NULL);
+                    nir->options->lower_to_scalar_filter, (void 
*)sscreen->use_aco);
       }
       if (lower_phis_to_scalar)
          NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
@@ -139,8 +146,10 @@ void si_nir_opts(struct si_screen *sscreen, struct 
nir_shader *nir, bool first)
       if (nir->info.stage == MESA_SHADER_FRAGMENT)
          NIR_PASS_V(nir, nir_opt_move_discards_to_top);
 
-      if (sscreen->info.has_packed_math_16bit)
-         NIR_PASS(progress, nir, nir_opt_vectorize, si_vectorize_callback, 
NULL);
+      if (sscreen->info.has_packed_math_16bit) {
+         NIR_PASS(progress, nir, nir_opt_vectorize, si_vectorize_callback,
+                  (void *)sscreen->use_aco);
+      }
    } while (progress);
 
    NIR_PASS_V(nir, nir_lower_var_copies);

Mesa (main): radeonsi: only vectorize nir ops that aco support

Reply via email to