Module: Mesa Branch: main Commit: 822e756511784b791f191d152590215b915b17e2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=822e756511784b791f191d152590215b915b17e2
Author: Qiang Yu <[email protected]> Date: Sat Feb 11 19:11:08 2023 +0800 ac/llvm,radeonsi: lower fbfetch in abi Reviewed-by: Marek Olšák <[email protected]> Signed-off-by: Qiang Yu <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21436> --- src/amd/llvm/ac_llvm_build.c | 57 ----------------- src/amd/llvm/ac_llvm_build.h | 3 - src/amd/llvm/ac_nir_to_llvm.c | 4 -- src/amd/llvm/ac_shader_abi.h | 2 - src/gallium/drivers/radeonsi/si_nir_lower_abi.c | 78 +++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_shader_internal.h | 1 - src/gallium/drivers/radeonsi/si_shader_llvm.c | 2 - src/gallium/drivers/radeonsi/si_shader_llvm_ps.c | 71 --------------------- 8 files changed, 78 insertions(+), 140 deletions(-) diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 179a1d5136f..a9668e3f4f4 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -3152,63 +3152,6 @@ LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, un return value; } -/* Adjust the sample index according to FMASK. - * - * For uncompressed MSAA surfaces, FMASK should return 0x76543210, - * which is the identity mapping. Each nibble says which physical sample - * should be fetched to get that sample. - * - * For example, 0x11111100 means there are only 2 samples stored and - * the second sample covers 3/4 of the pixel. When reading samples 0 - * and 1, return physical sample 0 (determined by the first two 0s - * in FMASK), otherwise return physical sample 1. - * - * The sample index should be adjusted as follows: - * addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF; - */ -void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LLVMValueRef *addr, - bool is_array_tex) -{ - struct ac_image_args fmask_load = {0}; - fmask_load.opcode = ac_image_load; - fmask_load.resource = fmask; - fmask_load.dmask = 0xf; - fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d; - fmask_load.attributes = AC_ATTR_INVARIANT_LOAD; - - fmask_load.coords[0] = addr[0]; - fmask_load.coords[1] = addr[1]; - if (is_array_tex) - fmask_load.coords[2] = addr[2]; - fmask_load.a16 = ac_get_elem_bits(ac, LLVMTypeOf(addr[0])) == 16; - - LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load); - fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, ac->i32_0, ""); - - /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK - * resource descriptor is 0 (invalid). - */ - LLVMValueRef tmp; - tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, ""); - tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, ""); - tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, ""); - fmask_value = - LLVMBuildSelect(ac->builder, tmp, fmask_value, LLVMConstInt(ac->i32, 0x76543210, false), ""); - - /* Apply the formula. */ - unsigned sample_chan = is_array_tex ? 3 : 2; - LLVMValueRef final_sample; - final_sample = LLVMBuildMul(ac->builder, addr[sample_chan], - LLVMConstInt(LLVMTypeOf(addr[0]), 4, 0), ""); - final_sample = LLVMBuildLShr(ac->builder, fmask_value, - LLVMBuildZExt(ac->builder, final_sample, ac->i32, ""), ""); - /* Mask the sample index by 0x7, because 0x8 means an unknown value - * with EQAA, so those will map to 0. */ - addr[sample_chan] = LLVMBuildAnd(ac->builder, final_sample, LLVMConstInt(ac->i32, 0x7, 0), ""); - if (fmask_load.a16) - addr[sample_chan] = LLVMBuildTrunc(ac->builder, final_sample, ac->i16, ""); -} - static LLVMValueRef _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane, bool with_opt_barrier) { diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 49d89d60f09..e493433ac1d 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -502,9 +502,6 @@ LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, uns LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, unsigned rshift, unsigned bitwidth); -void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LLVMValueRef *addr, - bool is_array_tex); - LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask); LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, LLVMValueRef src, diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index a83c5cb3ebe..af908670e40 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3337,10 +3337,6 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr * /* No indirect indexing is allowed after this point. */ assert(!indir_index); - if (ctx->stage == MESA_SHADER_FRAGMENT && is_output && - nir_intrinsic_io_semantics(instr).fb_fetch_output) - return ctx->abi->emit_fbfetch(ctx->abi); - if (ctx->stage == MESA_SHADER_VERTEX && !is_output) return ctx->abi->load_inputs(ctx->abi, base, component, count, 0, component_type); diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index 9a570bba1b4..6a84e685127 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -96,8 +96,6 @@ struct ac_shader_abi { LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi, LLVMValueRef index, enum ac_descriptor_type desc_type); - LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi); - LLVMValueRef (*intrinsic_load)(struct ac_shader_abi *abi, nir_intrinsic_instr *intrin); /* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index 5bb0a8adc62..323329e737a 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -107,6 +107,72 @@ static nir_ssa_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shade return nir_vec(b, comp, 4); } +static nir_ssa_def * +fetch_framebuffer(nir_builder *b, struct si_shader_args *args, + struct si_shader_selector *sel, union si_shader_key *key) +{ + /* Load the image descriptor. */ + STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0); + STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0_FMASK % 2 == 0); + + nir_ssa_def *zero = nir_imm_zero(b, 1, 32); + nir_ssa_def *undef = nir_ssa_undef(b, 1, 32); + + unsigned chan = 0; + nir_ssa_def *vec[4] = {undef, undef, undef, undef}; + + vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->pos_fixed_pt, 0, 16); + + if (!key->ps.mono.fbfetch_is_1D) + vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->pos_fixed_pt, 16, 16); + + /* Get the current render target layer index. */ + if (key->ps.mono.fbfetch_layered) + vec[chan++] = ac_nir_unpack_arg(b, &args->ac, args->ac.ancillary, 16, 11); + + nir_ssa_def *coords = nir_vec(b, vec, 4); + + enum glsl_sampler_dim dim; + if (key->ps.mono.fbfetch_msaa) + dim = GLSL_SAMPLER_DIM_MS; + else if (key->ps.mono.fbfetch_is_1D) + dim = GLSL_SAMPLER_DIM_1D; + else + dim = GLSL_SAMPLER_DIM_2D; + + nir_ssa_def *sample_id; + if (key->ps.mono.fbfetch_msaa) { + sample_id = ac_nir_unpack_arg(b, &args->ac, args->ac.ancillary, 8, 4); + + if (sel->screen->info.gfx_level < GFX11 && + !(sel->screen->debug_flags & DBG(NO_FMASK))) { + nir_ssa_def *desc = + load_internal_binding(b, args, SI_PS_IMAGE_COLORBUF0_FMASK, 8); + + nir_ssa_def *fmask = + nir_bindless_image_fragment_mask_load_amd( + b, desc, coords, + .image_dim = dim, + .image_array = key->ps.mono.fbfetch_layered, + .access = ACCESS_CAN_REORDER); + + nir_ssa_def *offset = nir_ishl_imm(b, sample_id, 2); + /* 3 for EQAA handling, see lower_image_to_fragment_mask_load() */ + nir_ssa_def *width = nir_imm_int(b, 3); + sample_id = nir_ubfe(b, fmask, offset, width); + } + } else { + sample_id = zero; + } + + nir_ssa_def *desc = load_internal_binding(b, args, SI_PS_IMAGE_COLORBUF0, 8); + + return nir_bindless_image_load(b, 4, 32, desc, coords, sample_id, zero, + .image_dim = dim, + .image_array = key->ps.mono.fbfetch_layered, + .access = ACCESS_CAN_REORDER); +} + static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_state *s) { if (instr->type != nir_instr_type_intrinsic) @@ -355,6 +421,18 @@ static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_s } break; } + case nir_intrinsic_load_output: { + nir_io_semantics sem = nir_intrinsic_io_semantics(intrin); + + /* not fbfetch */ + if (!(stage == MESA_SHADER_FRAGMENT && sem.fb_fetch_output)) + return false; + + /* Ignore src0, because KHR_blend_func_extended disallows multiple render targets. */ + + replacement = fetch_framebuffer(b, args, sel, key); + break; + } default: return false; } diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 98fa3c2d8af..961a20ee73c 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -228,7 +228,6 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part bool separate_epilog); void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader *shader); void si_llvm_ps_build_end(struct si_shader_context *ctx); -void si_llvm_init_ps_callbacks(struct si_shader_context *ctx); /* si_shader_llvm_vs.c */ void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key, diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 0c1147fb17a..8e40c0cacf0 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -830,8 +830,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade break; case MESA_SHADER_FRAGMENT: { - si_llvm_init_ps_callbacks(ctx); - unsigned colors_read = ctx->shader->selector->info.colors_read; LLVMValueRef main_fn = ctx->main_fn.value; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c index c43dc18ce44..41cb49a33d4 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c @@ -26,72 +26,6 @@ #include "si_shader_internal.h" #include "sid.h" -static LLVMValueRef si_get_sample_id(struct si_shader_context *ctx) -{ - return si_unpack_param(ctx, ctx->args->ac.ancillary, 8, 4); -} - -static LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - struct ac_image_args args = {}; - LLVMValueRef ptr, image, fmask; - - /* Ignore src0, because KHR_blend_func_extended disallows multiple render - * targets. - */ - - /* Load the image descriptor. */ - STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0); - STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0_FMASK % 2 == 0); - - ptr = ac_get_arg(&ctx->ac, ctx->args->internal_bindings); - ptr = - LLVMBuildPointerCast(ctx->ac.builder, ptr, ac_array_in_const32_addr_space(ctx->ac.v8i32), ""); - struct ac_llvm_pointer desc = { .v = ptr, .t = ctx->ac.v8i32 }; - - image = ac_build_load_to_sgpr(&ctx->ac, desc, LLVMConstInt(ctx->ac.i32, SI_PS_IMAGE_COLORBUF0 / 2, 0)); - - unsigned chan = 0; - - args.coords[chan++] = si_unpack_param(ctx, ctx->args->pos_fixed_pt, 0, 16); - - if (!ctx->shader->key.ps.mono.fbfetch_is_1D) - args.coords[chan++] = si_unpack_param(ctx, ctx->args->pos_fixed_pt, 16, 16); - - /* Get the current render target layer index. */ - if (ctx->shader->key.ps.mono.fbfetch_layered) - args.coords[chan++] = si_unpack_param(ctx, ctx->args->ac.ancillary, 16, 11); - - if (ctx->shader->key.ps.mono.fbfetch_msaa) - args.coords[chan++] = si_get_sample_id(ctx); - - if (ctx->screen->info.gfx_level < GFX11 && - ctx->shader->key.ps.mono.fbfetch_msaa && - !(ctx->screen->debug_flags & DBG(NO_FMASK))) { - - fmask = ac_build_load_to_sgpr(&ctx->ac, desc, LLVMConstInt(ctx->ac.i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0)); - - ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords, - ctx->shader->key.ps.mono.fbfetch_layered); - } - - args.opcode = ac_image_load; - args.resource = image; - args.dmask = 0xf; - args.attributes = AC_ATTR_INVARIANT_LOAD; - - if (ctx->shader->key.ps.mono.fbfetch_msaa) - args.dim = - ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_2darraymsaa : ac_image_2dmsaa; - else if (ctx->shader->key.ps.mono.fbfetch_is_1D) - args.dim = ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_1darray : ac_image_1d; - else - args.dim = ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_2darray : ac_image_2d; - - return ac_build_image_opcode(&ctx->ac, &args); -} - static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx, unsigned attr_index, unsigned chan, LLVMValueRef prim_mask, LLVMValueRef i, LLVMValueRef j) @@ -943,8 +877,3 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader si_build_wrapper_function(ctx, parts, num_parts, main_index, 0, main_arg_types, false); } - -void si_llvm_init_ps_callbacks(struct si_shader_context *ctx) -{ - ctx->abi.emit_fbfetch = si_nir_emit_fbfetch; -}
