Module: Mesa
Branch: main
Commit: 822e756511784b791f191d152590215b915b17e2
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=822e756511784b791f191d152590215b915b17e2

Author: Qiang Yu <[email protected]>
Date:   Sat Feb 11 19:11:08 2023 +0800

ac/llvm,radeonsi: lower fbfetch in abi

Reviewed-by: Marek Olšák <[email protected]>
Signed-off-by: Qiang Yu <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21436>

---

 src/amd/llvm/ac_llvm_build.c                      | 57 -----------------
 src/amd/llvm/ac_llvm_build.h                      |  3 -
 src/amd/llvm/ac_nir_to_llvm.c                     |  4 --
 src/amd/llvm/ac_shader_abi.h                      |  2 -
 src/gallium/drivers/radeonsi/si_nir_lower_abi.c   | 78 +++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_shader_internal.h |  1 -
 src/gallium/drivers/radeonsi/si_shader_llvm.c     |  2 -
 src/gallium/drivers/radeonsi/si_shader_llvm_ps.c  | 71 ---------------------
 8 files changed, 78 insertions(+), 140 deletions(-)

diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c
index 179a1d5136f..a9668e3f4f4 100644
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -3152,63 +3152,6 @@ LLVMValueRef ac_unpack_param(struct ac_llvm_context 
*ctx, LLVMValueRef param, un
    return value;
 }
 
-/* Adjust the sample index according to FMASK.
- *
- * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
- * which is the identity mapping. Each nibble says which physical sample
- * should be fetched to get that sample.
- *
- * For example, 0x11111100 means there are only 2 samples stored and
- * the second sample covers 3/4 of the pixel. When reading samples 0
- * and 1, return physical sample 0 (determined by the first two 0s
- * in FMASK), otherwise return physical sample 1.
- *
- * The sample index should be adjusted as follows:
- *   addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF;
- */
-void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, 
LLVMValueRef *addr,
-                              bool is_array_tex)
-{
-   struct ac_image_args fmask_load = {0};
-   fmask_load.opcode = ac_image_load;
-   fmask_load.resource = fmask;
-   fmask_load.dmask = 0xf;
-   fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
-   fmask_load.attributes = AC_ATTR_INVARIANT_LOAD;
-
-   fmask_load.coords[0] = addr[0];
-   fmask_load.coords[1] = addr[1];
-   if (is_array_tex)
-      fmask_load.coords[2] = addr[2];
-   fmask_load.a16 = ac_get_elem_bits(ac, LLVMTypeOf(addr[0])) == 16;
-
-   LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
-   fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, ac->i32_0, 
"");
-
-   /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
-    * resource descriptor is 0 (invalid).
-    */
-   LLVMValueRef tmp;
-   tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, "");
-   tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, "");
-   tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, "");
-   fmask_value =
-      LLVMBuildSelect(ac->builder, tmp, fmask_value, LLVMConstInt(ac->i32, 
0x76543210, false), "");
-
-   /* Apply the formula. */
-   unsigned sample_chan = is_array_tex ? 3 : 2;
-   LLVMValueRef final_sample;
-   final_sample = LLVMBuildMul(ac->builder, addr[sample_chan],
-                               LLVMConstInt(LLVMTypeOf(addr[0]), 4, 0), "");
-   final_sample = LLVMBuildLShr(ac->builder, fmask_value,
-                                LLVMBuildZExt(ac->builder, final_sample, 
ac->i32, ""), "");
-   /* Mask the sample index by 0x7, because 0x8 means an unknown value
-    * with EQAA, so those will map to 0. */
-   addr[sample_chan] = LLVMBuildAnd(ac->builder, final_sample, 
LLVMConstInt(ac->i32, 0x7, 0), "");
-   if (fmask_load.a16)
-      addr[sample_chan] = LLVMBuildTrunc(ac->builder, final_sample, ac->i16, 
"");
-}
-
 static LLVMValueRef _ac_build_readlane(struct ac_llvm_context *ctx, 
LLVMValueRef src,
                                        LLVMValueRef lane, bool 
with_opt_barrier)
 {
diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h
index 49d89d60f09..e493433ac1d 100644
--- a/src/amd/llvm/ac_llvm_build.h
+++ b/src/amd/llvm/ac_llvm_build.h
@@ -502,9 +502,6 @@ LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, 
LLVMValueRef value, uns
 LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, 
unsigned rshift,
                              unsigned bitwidth);
 
-void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, 
LLVMValueRef *addr,
-                              bool is_array_tex);
-
 LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef 
src, unsigned mask);
 
 LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, 
LLVMValueRef src,
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index a83c5cb3ebe..af908670e40 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -3337,10 +3337,6 @@ static LLVMValueRef visit_load(struct ac_nir_context 
*ctx, nir_intrinsic_instr *
    /* No indirect indexing is allowed after this point. */
    assert(!indir_index);
 
-   if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
-       nir_intrinsic_io_semantics(instr).fb_fetch_output)
-      return ctx->abi->emit_fbfetch(ctx->abi);
-
    if (ctx->stage == MESA_SHADER_VERTEX && !is_output)
       return ctx->abi->load_inputs(ctx->abi, base, component, count, 0, 
component_type);
 
diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h
index 9a570bba1b4..6a84e685127 100644
--- a/src/amd/llvm/ac_shader_abi.h
+++ b/src/amd/llvm/ac_shader_abi.h
@@ -96,8 +96,6 @@ struct ac_shader_abi {
    LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi, LLVMValueRef 
index,
                                      enum ac_descriptor_type desc_type);
 
-   LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi);
-
    LLVMValueRef (*intrinsic_load)(struct ac_shader_abi *abi, 
nir_intrinsic_instr *intrin);
 
    /* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi 
currently
diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c 
b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c
index 5bb0a8adc62..323329e737a 100644
--- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c
+++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c
@@ -107,6 +107,72 @@ static nir_ssa_def *build_attr_ring_desc(nir_builder *b, 
struct si_shader *shade
    return nir_vec(b, comp, 4);
 }
 
+static nir_ssa_def *
+fetch_framebuffer(nir_builder *b, struct si_shader_args *args,
+                  struct si_shader_selector *sel, union si_shader_key *key)
+{
+   /* Load the image descriptor. */
+   STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
+   STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0_FMASK % 2 == 0);
+
+   nir_ssa_def *zero = nir_imm_zero(b, 1, 32);
+   nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
+
+   unsigned chan = 0;
+   nir_ssa_def *vec[4] = {undef, undef, undef, undef};
+
+   vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->pos_fixed_pt, 0, 16);
+
+   if (!key->ps.mono.fbfetch_is_1D)
+      vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->pos_fixed_pt, 16, 
16);
+
+   /* Get the current render target layer index. */
+   if (key->ps.mono.fbfetch_layered)
+      vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->ac.ancillary, 16, 
11);
+
+   nir_ssa_def *coords = nir_vec(b, vec, 4);
+
+   enum glsl_sampler_dim dim;
+   if (key->ps.mono.fbfetch_msaa)
+      dim = GLSL_SAMPLER_DIM_MS;
+   else if (key->ps.mono.fbfetch_is_1D)
+      dim = GLSL_SAMPLER_DIM_1D;
+   else
+      dim = GLSL_SAMPLER_DIM_2D;
+
+   nir_ssa_def *sample_id;
+   if (key->ps.mono.fbfetch_msaa) {
+      sample_id = ac_nir_unpack_arg(b, &args->ac, args->ac.ancillary, 8, 4);
+
+      if (sel->screen->info.gfx_level < GFX11 &&
+          !(sel->screen->debug_flags & DBG(NO_FMASK))) {
+         nir_ssa_def *desc =
+            load_internal_binding(b, args, SI_PS_IMAGE_COLORBUF0_FMASK, 8);
+
+         nir_ssa_def *fmask =
+            nir_bindless_image_fragment_mask_load_amd(
+               b, desc, coords,
+               .image_dim = dim,
+               .image_array = key->ps.mono.fbfetch_layered,
+               .access = ACCESS_CAN_REORDER);
+
+         nir_ssa_def *offset = nir_ishl_imm(b, sample_id, 2);
+         /* 3 for EQAA handling, see lower_image_to_fragment_mask_load() */
+         nir_ssa_def *width = nir_imm_int(b, 3);
+         sample_id = nir_ubfe(b, fmask, offset, width);
+      }
+   } else {
+      sample_id = zero;
+   }
+
+   nir_ssa_def *desc = load_internal_binding(b, args, SI_PS_IMAGE_COLORBUF0, 
8);
+
+   return nir_bindless_image_load(b, 4, 32, desc, coords, sample_id, zero,
+                                  .image_dim = dim,
+                                  .image_array = key->ps.mono.fbfetch_layered,
+                                  .access = ACCESS_CAN_REORDER);
+}
+
 static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct 
lower_abi_state *s)
 {
    if (instr->type != nir_instr_type_intrinsic)
@@ -355,6 +421,18 @@ static bool lower_abi_instr(nir_builder *b, nir_instr 
*instr, struct lower_abi_s
       }
       break;
    }
+   case nir_intrinsic_load_output: {
+      nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
+
+      /* not fbfetch */
+      if (!(stage == MESA_SHADER_FRAGMENT && sem.fb_fetch_output))
+         return false;
+
+      /* Ignore src0, because KHR_blend_func_extended disallows multiple 
render targets. */
+
+      replacement = fetch_framebuffer(b, args, sel, key);
+      break;
+   }
    default:
       return false;
    }
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 98fa3c2d8af..961a20ee73c 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -228,7 +228,6 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, 
union si_shader_part
                              bool separate_epilog);
 void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct 
si_shader *shader);
 void si_llvm_ps_build_end(struct si_shader_context *ctx);
-void si_llvm_init_ps_callbacks(struct si_shader_context *ctx);
 
 /* si_shader_llvm_vs.c */
 void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union 
si_shader_part_key *key,
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c 
b/src/gallium/drivers/radeonsi/si_shader_llvm.c
index 0c1147fb17a..8e40c0cacf0 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -830,8 +830,6 @@ static bool si_llvm_translate_nir(struct si_shader_context 
*ctx, struct si_shade
       break;
 
    case MESA_SHADER_FRAGMENT: {
-      si_llvm_init_ps_callbacks(ctx);
-
       unsigned colors_read = ctx->shader->selector->info.colors_read;
       LLVMValueRef main_fn = ctx->main_fn.value;
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c 
b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c
index c43dc18ce44..41cb49a33d4 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c
@@ -26,72 +26,6 @@
 #include "si_shader_internal.h"
 #include "sid.h"
 
-static LLVMValueRef si_get_sample_id(struct si_shader_context *ctx)
-{
-   return si_unpack_param(ctx, ctx->args->ac.ancillary, 8, 4);
-}
-
-static LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
-{
-   struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-   struct ac_image_args args = {};
-   LLVMValueRef ptr, image, fmask;
-
-   /* Ignore src0, because KHR_blend_func_extended disallows multiple render
-    * targets.
-    */
-
-   /* Load the image descriptor. */
-   STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
-   STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0_FMASK % 2 == 0);
-
-   ptr = ac_get_arg(&ctx->ac, ctx->args->internal_bindings);
-   ptr =
-      LLVMBuildPointerCast(ctx->ac.builder, ptr, 
ac_array_in_const32_addr_space(ctx->ac.v8i32), "");
-   struct ac_llvm_pointer desc = { .v = ptr, .t = ctx->ac.v8i32 };
-
-   image = ac_build_load_to_sgpr(&ctx->ac, desc, LLVMConstInt(ctx->ac.i32, 
SI_PS_IMAGE_COLORBUF0 / 2, 0));
-
-   unsigned chan = 0;
-
-   args.coords[chan++] = si_unpack_param(ctx, ctx->args->pos_fixed_pt, 0, 16);
-
-   if (!ctx->shader->key.ps.mono.fbfetch_is_1D)
-      args.coords[chan++] = si_unpack_param(ctx, ctx->args->pos_fixed_pt, 16, 
16);
-
-   /* Get the current render target layer index. */
-   if (ctx->shader->key.ps.mono.fbfetch_layered)
-      args.coords[chan++] = si_unpack_param(ctx, ctx->args->ac.ancillary, 16, 
11);
-
-   if (ctx->shader->key.ps.mono.fbfetch_msaa)
-      args.coords[chan++] = si_get_sample_id(ctx);
-
-   if (ctx->screen->info.gfx_level < GFX11 &&
-       ctx->shader->key.ps.mono.fbfetch_msaa &&
-       !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
-
-      fmask = ac_build_load_to_sgpr(&ctx->ac, desc, LLVMConstInt(ctx->ac.i32, 
SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
-
-      ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
-                               ctx->shader->key.ps.mono.fbfetch_layered);
-   }
-
-   args.opcode = ac_image_load;
-   args.resource = image;
-   args.dmask = 0xf;
-   args.attributes = AC_ATTR_INVARIANT_LOAD;
-
-   if (ctx->shader->key.ps.mono.fbfetch_msaa)
-      args.dim =
-         ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_2darraymsaa : 
ac_image_2dmsaa;
-   else if (ctx->shader->key.ps.mono.fbfetch_is_1D)
-      args.dim = ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_1darray : 
ac_image_1d;
-   else
-      args.dim = ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_2darray : 
ac_image_2d;
-
-   return ac_build_image_opcode(&ctx->ac, &args);
-}
-
 static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx, unsigned 
attr_index,
                                        unsigned chan, LLVMValueRef prim_mask, 
LLVMValueRef i,
                                        LLVMValueRef j)
@@ -943,8 +877,3 @@ void si_llvm_build_monolithic_ps(struct si_shader_context 
*ctx, struct si_shader
 
    si_build_wrapper_function(ctx, parts, num_parts, main_index, 0, 
main_arg_types, false);
 }
-
-void si_llvm_init_ps_callbacks(struct si_shader_context *ctx)
-{
-   ctx->abi.emit_fbfetch = si_nir_emit_fbfetch;
-}

Reply via email to