Module: Mesa
Branch: main
Commit: c8ad1aeeb27df4a4781c4a26193059fdb7fd52b6
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c8ad1aeeb27df4a4781c4a26193059fdb7fd52b6

Author: Georg Lehmann <[email protected]>
Date:   Thu Aug 11 12:11:04 2022 +0200

nir/fold_16bit_tex_image: Add an option to fold image sources.

Signed-off-by: Georg Lehmann <[email protected]>
Reviewed-by: Rhys Perry <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18106>

---

 src/compiler/nir/nir.h               |  1 +
 src/compiler/nir/nir_lower_mediump.c | 82 ++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 26ba6acd111..8fb1b30581b 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -5360,6 +5360,7 @@ struct nir_fold_16bit_tex_image_options {
    nir_rounding_mode rounding_mode;
    bool fold_tex_dest;
    bool fold_image_load_store_data;
+   bool fold_image_srcs;
    unsigned fold_srcs_options_count;
    struct nir_fold_tex_srcs_options *fold_srcs_options;
 };
diff --git a/src/compiler/nir/nir_lower_mediump.c 
b/src/compiler/nir/nir_lower_mediump.c
index bd6d270471f..4b849c09846 100644
--- a/src/compiler/nir/nir_lower_mediump.c
+++ b/src/compiler/nir/nir_lower_mediump.c
@@ -722,6 +722,30 @@ fold_16bit_tex_srcs(nir_builder *b, nir_tex_instr *tex,
    return !!fold_srcs;
 }
 
+static bool
+fold_16bit_image_srcs(nir_builder *b, nir_intrinsic_instr *instr, int lod_idx)
+{
+   enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
+   bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || dim == 
GLSL_SAMPLER_DIM_SUBPASS_MS);
+   nir_src *coords = &instr->src[1];
+   nir_src *sample = is_ms ? &instr->src[2] : NULL;
+   nir_src *lod = lod_idx >= 0 ? &instr->src[lod_idx] : NULL;
+
+   if (dim == GLSL_SAMPLER_DIM_BUF ||
+       !can_fold_16bit_src(coords->ssa, nir_type_int32, false) ||
+       (sample && !can_fold_16bit_src(sample->ssa, nir_type_int32, false)) ||
+       (lod && !can_fold_16bit_src(lod->ssa, nir_type_int32, false)))
+      return false;
+
+   fold_16bit_src(b, &instr->instr, coords, nir_type_int32);
+   if (sample)
+      fold_16bit_src(b, &instr->instr, sample, nir_type_int32);
+   if (lod)
+      fold_16bit_src(b, &instr->instr, lod, nir_type_int32);
+
+   return true;
+}
+
 static bool
 fold_16bit_tex_image(nir_builder *b, nir_instr *instr, void *params)
 {
@@ -738,12 +762,70 @@ fold_16bit_tex_image(nir_builder *b, nir_instr *instr, 
void *params)
       case nir_intrinsic_image_store:
          if (options->fold_image_load_store_data)
             progress |= fold_16bit_store_data(b, intrinsic);
+         if (options->fold_image_srcs)
+            progress |= fold_16bit_image_srcs(b, intrinsic, 4);
          break;
       case nir_intrinsic_bindless_image_load:
       case nir_intrinsic_image_deref_load:
       case nir_intrinsic_image_load:
          if (options->fold_image_load_store_data)
             progress |= fold_16bit_load_data(b, intrinsic, exec_mode, 
options->rounding_mode);
+         if (options->fold_image_srcs)
+            progress |= fold_16bit_image_srcs(b, intrinsic, 3);
+         break;
+      case nir_intrinsic_bindless_image_sparse_load:
+      case nir_intrinsic_image_deref_sparse_load:
+      case nir_intrinsic_image_sparse_load:
+         if (options->fold_image_srcs)
+            progress |= fold_16bit_image_srcs(b, intrinsic, 3);
+         break;
+      case nir_intrinsic_bindless_image_atomic_add:
+      case nir_intrinsic_bindless_image_atomic_imin:
+      case nir_intrinsic_bindless_image_atomic_umin:
+      case nir_intrinsic_bindless_image_atomic_imax:
+      case nir_intrinsic_bindless_image_atomic_umax:
+      case nir_intrinsic_bindless_image_atomic_and:
+      case nir_intrinsic_bindless_image_atomic_or:
+      case nir_intrinsic_bindless_image_atomic_xor:
+      case nir_intrinsic_bindless_image_atomic_exchange:
+      case nir_intrinsic_bindless_image_atomic_comp_swap:
+      case nir_intrinsic_bindless_image_atomic_fadd:
+      case nir_intrinsic_bindless_image_atomic_fmin:
+      case nir_intrinsic_bindless_image_atomic_fmax:
+      case nir_intrinsic_bindless_image_atomic_inc_wrap:
+      case nir_intrinsic_bindless_image_atomic_dec_wrap:
+      case nir_intrinsic_image_deref_atomic_add:
+      case nir_intrinsic_image_deref_atomic_umin:
+      case nir_intrinsic_image_deref_atomic_imin:
+      case nir_intrinsic_image_deref_atomic_umax:
+      case nir_intrinsic_image_deref_atomic_imax:
+      case nir_intrinsic_image_deref_atomic_and:
+      case nir_intrinsic_image_deref_atomic_or:
+      case nir_intrinsic_image_deref_atomic_xor:
+      case nir_intrinsic_image_deref_atomic_exchange:
+      case nir_intrinsic_image_deref_atomic_comp_swap:
+      case nir_intrinsic_image_deref_atomic_fadd:
+      case nir_intrinsic_image_deref_atomic_fmin:
+      case nir_intrinsic_image_deref_atomic_fmax:
+      case nir_intrinsic_image_deref_atomic_inc_wrap:
+      case nir_intrinsic_image_deref_atomic_dec_wrap:
+      case nir_intrinsic_image_atomic_add:
+      case nir_intrinsic_image_atomic_imin:
+      case nir_intrinsic_image_atomic_umin:
+      case nir_intrinsic_image_atomic_imax:
+      case nir_intrinsic_image_atomic_umax:
+      case nir_intrinsic_image_atomic_and:
+      case nir_intrinsic_image_atomic_or:
+      case nir_intrinsic_image_atomic_xor:
+      case nir_intrinsic_image_atomic_exchange:
+      case nir_intrinsic_image_atomic_comp_swap:
+      case nir_intrinsic_image_atomic_fadd:
+      case nir_intrinsic_image_atomic_fmin:
+      case nir_intrinsic_image_atomic_fmax:
+      case nir_intrinsic_image_atomic_inc_wrap:
+      case nir_intrinsic_image_atomic_dec_wrap:
+         if (options->fold_image_srcs)
+            progress |= fold_16bit_image_srcs(b, intrinsic, -1);
          break;
       default:
          break;

Reply via email to