Module: Mesa
Branch: main
Commit: a00b50d820b5a293ba6f05fee631f02bf86138f4
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a00b50d820b5a293ba6f05fee631f02bf86138f4

Author: Georg Lehmann <[email protected]>
Date:   Sat Feb 18 13:37:48 2023 +0100

nir: change 16bit image dest folding option to per type

Reviewed-by: Marek Olšák <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21404>

---

 src/amd/vulkan/radv_pipeline.c               |  3 ++-
 src/compiler/nir/nir.h                       |  3 ++-
 src/compiler/nir/nir_lower_mediump.c         | 15 ++++++++++-----
 src/freedreno/ir3/ir3_nir.c                  |  4 +++-
 src/gallium/drivers/radeonsi/si_shader_nir.c |  3 ++-
 5 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 3e2dc6fde0f..7d8789c343d 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3297,7 +3297,8 @@ radv_postprocess_nir(struct radv_pipeline *pipeline,
       struct nir_fold_16bit_tex_image_options fold_16bit_options = {
          .rounding_mode = nir_rounding_mode_rtne,
          .fold_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int,
-         .fold_image_load_store_data = true,
+         .fold_image_dest_types = nir_type_float | nir_type_uint | 
nir_type_int,
+         .fold_image_store_data = true,
          .fold_image_srcs = !radv_use_llvm_for_stage(device, stage->stage),
          .fold_srcs_options_count = separate_g16 ? 2 : 1,
          .fold_srcs_options = fold_srcs_options,
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 621b8363cad..db0a536caa4 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -5566,7 +5566,8 @@ struct nir_fold_tex_srcs_options {
 struct nir_fold_16bit_tex_image_options {
    nir_rounding_mode rounding_mode;
    nir_alu_type fold_tex_dest_types;
-   bool fold_image_load_store_data;
+   nir_alu_type fold_image_dest_types;
+   bool fold_image_store_data;
    bool fold_image_srcs;
    unsigned fold_srcs_options_count;
    struct nir_fold_tex_srcs_options *fold_srcs_options;
diff --git a/src/compiler/nir/nir_lower_mediump.c 
b/src/compiler/nir/nir_lower_mediump.c
index c5597f8086d..c1baabf27ed 100644
--- a/src/compiler/nir/nir_lower_mediump.c
+++ b/src/compiler/nir/nir_lower_mediump.c
@@ -886,11 +886,14 @@ fold_16bit_destination(nir_ssa_def *ssa, nir_alu_type 
dest_type,
 }
 
 static bool
-fold_16bit_load_data(nir_builder *b, nir_intrinsic_instr *instr,
-                     unsigned exec_mode, nir_rounding_mode rdm)
+fold_16bit_image_dest(nir_intrinsic_instr *instr, unsigned exec_mode,
+                      nir_alu_type allowed_types, nir_rounding_mode rdm)
 {
    nir_alu_type dest_type = nir_intrinsic_dest_type(instr);
 
+   if (!(nir_alu_type_get_base_type(dest_type) & allowed_types))
+      return false;
+
    if (!fold_16bit_destination(&instr->dest.ssa, dest_type, exec_mode, rdm))
       return false;
 
@@ -1016,7 +1019,7 @@ fold_16bit_tex_image(nir_builder *b, nir_instr *instr, 
void *params)
       case nir_intrinsic_bindless_image_store:
       case nir_intrinsic_image_deref_store:
       case nir_intrinsic_image_store:
-         if (options->fold_image_load_store_data)
+         if (options->fold_image_store_data)
             progress |= fold_16bit_store_data(b, intrinsic);
          if (options->fold_image_srcs)
             progress |= fold_16bit_image_srcs(b, intrinsic, 4);
@@ -1024,8 +1027,10 @@ fold_16bit_tex_image(nir_builder *b, nir_instr *instr, 
void *params)
       case nir_intrinsic_bindless_image_load:
       case nir_intrinsic_image_deref_load:
       case nir_intrinsic_image_load:
-         if (options->fold_image_load_store_data)
-            progress |= fold_16bit_load_data(b, intrinsic, exec_mode, 
options->rounding_mode);
+         if (options->fold_image_dest_types)
+            progress |= fold_16bit_image_dest(intrinsic, exec_mode,
+                                              options->fold_image_dest_types,
+                                              options->rounding_mode);
          if (options->fold_image_srcs)
             progress |= fold_16bit_image_srcs(b, intrinsic, 3);
          break;
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 388017eb9be..59389bf5247 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -783,7 +783,9 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, 
nir_shader *s)
             .rounding_mode = nir_rounding_mode_rtz,
             .fold_tex_dest_types = nir_type_float,
             /* blob dumps have no half regs on pixel 2's ldib or stib, so only 
enable for a6xx+. */
-            .fold_image_load_store_data = so->compiler->gen >= 6,
+            .fold_image_dest_types = so->compiler->gen >= 6 ?
+                                        nir_type_float | nir_type_uint | 
nir_type_int : 0,
+            .fold_image_store_data = so->compiler->gen >= 6,
             .fold_srcs_options_count = 1,
             .fold_srcs_options = &fold_srcs_options,
          };
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index f2d588a2fe0..12e7cfdc553 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -198,7 +198,8 @@ static void si_late_optimize_16bit_samplers(struct 
si_screen *sscreen, nir_shade
    struct nir_fold_16bit_tex_image_options fold_16bit_options = {
       .rounding_mode = nir_rounding_mode_rtne,
       .fold_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int,
-      .fold_image_load_store_data = true,
+      .fold_image_dest_types = nir_type_float | nir_type_uint | nir_type_int,
+      .fold_image_store_data = true,
       .fold_srcs_options_count = has_g16 ? 2 : 1,
       .fold_srcs_options = fold_srcs_options,
    };

Reply via email to