From: Nicolai Hähnle <nicolai.haeh...@amd.com> This is in preparation for the new image intrinsics. --- src/amd/common/ac_llvm_build.c | 101 +++++- src/amd/common/ac_llvm_build.h | 14 +- src/amd/common/ac_nir_to_llvm.c | 365 +++++++-------------- src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h | 2 +- src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 222 +++++-------- 5 files changed, 295 insertions(+), 409 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 2bf38f809bb..edc729c0127 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -30,20 +30,21 @@ #include "c11/threads.h" #include <assert.h> #include <stdio.h> #include "ac_llvm_util.h" #include "ac_exp_param.h" #include "util/bitscan.h" #include "util/macros.h" #include "util/u_atomic.h" +#include "util/u_math.h" #include "sid.h" #include "shader_enums.h" #define AC_LLVM_INITIAL_CF_DEPTH 4 /* Data for if/else/endif and bgnloop/endloop control flow structures. */ struct ac_llvm_flow { /* Loop exit or next part of if/else/endif. */ @@ -1422,42 +1423,117 @@ void ac_build_export_null(struct ac_llvm_context *ctx) args.target = V_008DFC_SQ_EXP_NULL; args.compr = 0; /* COMPR flag (0 = 32-bit export) */ args.out[0] = LLVMGetUndef(ctx->f32); /* R */ args.out[1] = LLVMGetUndef(ctx->f32); /* G */ args.out[2] = LLVMGetUndef(ctx->f32); /* B */ args.out[3] = LLVMGetUndef(ctx->f32); /* A */ ac_build_export(ctx, &args); } +static unsigned ac_num_coords(enum ac_image_dim dim) +{ + switch (dim) { + case ac_image_1d: + return 1; + case ac_image_2d: + case ac_image_1darray: + return 2; + case ac_image_3d: + case ac_image_cube: + case ac_image_2darray: + case ac_image_2dmsaa: + return 3; + case ac_image_2darraymsaa: + return 4; + default: + unreachable("ac_num_coords: bad dim"); + } +} + +static unsigned ac_num_derivs(enum ac_image_dim dim) +{ + switch (dim) { + case ac_image_1d: + case ac_image_1darray: + return 2; + case ac_image_2d: + case ac_image_2darray: + case ac_image_cube: + return 4; + case ac_image_3d: + return 6; + case ac_image_2dmsaa: + case ac_image_2darraymsaa: + default: + unreachable("derivatives not supported"); + } +} + LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a) { - LLVMValueRef args[11]; - unsigned num_args = 0; + LLVMValueRef args[16]; const char *name = NULL; char intr_name[128], type[64]; + assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 || + !a->level_zero); + assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip) || + a->lod); + assert((a->bias ? 1 : 0) + + (a->lod ? 1 : 0) + + (a->level_zero ? 1 : 0) + + (a->derivs[0] ? 1 : 0) <= 1); + bool sample = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || a->opcode == ac_image_get_lod; bool da = a->dim == ac_image_cube || a->dim == ac_image_1darray || a->dim == ac_image_2darray || a->dim == ac_image_2darraymsaa; if (a->opcode == ac_image_get_lod) da = false; + unsigned num_coords = + a->opcode != ac_image_get_resinfo ? ac_num_coords(a->dim) : 0; + LLVMValueRef addr; + unsigned num_addr = 0; + + if (a->offset) + args[num_addr++] = ac_to_integer(ctx, a->offset); + if (a->bias) + args[num_addr++] = ac_to_integer(ctx, a->bias); + if (a->compare) + args[num_addr++] = ac_to_integer(ctx, a->compare); + if (a->derivs[0]) { + unsigned num_derivs = ac_num_derivs(a->dim); + for (unsigned i = 0; i < num_derivs; ++i) + args[num_addr++] = ac_to_integer(ctx, a->derivs[i]); + } + for (unsigned i = 0; i < num_coords; ++i) + args[num_addr++] = ac_to_integer(ctx, a->coords[i]); + if (a->lod) + args[num_addr++] = ac_to_integer(ctx, a->lod); + + unsigned pad_goal = util_next_power_of_two(num_addr); + while (num_addr < pad_goal) + args[num_addr++] = LLVMGetUndef(ctx->i32); + + addr = ac_build_gather_values(ctx, args, num_addr); + + unsigned num_args = 0; if (sample) - args[num_args++] = ac_to_float(ctx, a->addr); + args[num_args++] = ac_to_float(ctx, addr); else - args[num_args++] = a->addr; + args[num_args++] = ac_to_integer(ctx, addr); args[num_args++] = a->resource; if (sample) args[num_args++] = a->sampler; args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0); if (sample) args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0); args[num_args++] = ctx->i1false; /* glc */ args[num_args++] = ctx->i1false; /* slc */ args[num_args++] = ctx->i1false; /* lwe */ @@ -1482,26 +1558,29 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, case ac_image_get_resinfo: name = "llvm.amdgcn.image.getresinfo"; break; default: unreachable("invalid image opcode"); } ac_build_type_name_for_intr(LLVMTypeOf(args[0]), type, sizeof(type)); + bool lod_suffix = + a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4); + snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32", name, a->compare ? ".c" : "", a->bias ? ".b" : - a->lod ? ".l" : - a->deriv ? ".d" : + lod_suffix ? ".l" : + a->derivs[0] ? ".d" : a->level_zero ? ".lz" : "", a->offset ? ".o" : "", type); LLVMValueRef result = ac_build_intrinsic(ctx, intr_name, ctx->v4f32, args, num_args, AC_FUNC_ATTR_READNONE); if (!sample) { result = LLVMBuildBitCast(ctx->builder, result, @@ -2458,26 +2537,24 @@ LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, */ void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LLVMValueRef *addr, bool is_array_tex) { struct ac_image_args fmask_load = {}; fmask_load.opcode = ac_image_load; fmask_load.resource = fmask; fmask_load.dmask = 0xf; fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d; - LLVMValueRef fmask_addr[4]; - memcpy(fmask_addr, addr, sizeof(fmask_addr[0]) * 3); - fmask_addr[3] = LLVMGetUndef(ac->i32); - - fmask_load.addr = ac_build_gather_values(ac, fmask_addr, - is_array_tex ? 4 : 2); + fmask_load.coords[0] = addr[0]; + fmask_load.coords[1] = addr[1]; + if (is_array_tex) + fmask_load.coords[2] = addr[2]; LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load); fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, ac->i32_0, ""); /* Apply the formula. */ unsigned sample_chan = is_array_tex ? 3 : 2; LLVMValueRef final_sample; final_sample = LLVMBuildMul(ac->builder, addr[sample_chan], LLVMConstInt(ac->i32, 4, 0), ""); diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index a51390794a7..fcd465ef070 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -324,32 +324,32 @@ enum ac_image_dim { ac_image_cube, // includes cube arrays ac_image_1darray, ac_image_2darray, ac_image_2dmsaa, ac_image_2darraymsaa, }; struct ac_image_args { enum ac_image_opcode opcode; enum ac_image_dim dim; - bool level_zero; - bool bias; - bool lod; - bool deriv; - bool compare; - bool offset; LLVMValueRef resource; LLVMValueRef sampler; - LLVMValueRef addr; + LLVMValueRef offset; + LLVMValueRef bias; + LLVMValueRef compare; + LLVMValueRef derivs[6]; + LLVMValueRef coords[4]; + LLVMValueRef lod; // also used by ac_image_get_resinfo unsigned dmask; bool unorm; + bool level_zero; }; LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a); LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2]); LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2]); LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2]); diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index ce7afaf96b4..ddd1265c8da 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1145,61 +1145,53 @@ static void build_int_type_name( LLVMGetVectorSize(type)); else strcpy(buf, "i32"); } static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, struct ac_image_args *args, const nir_tex_instr *instr) { enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); - LLVMValueRef coord = args->addr; LLVMValueRef half_texel[2]; LLVMValueRef compare_cube_wa = NULL; LLVMValueRef result; - int c; - unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare; //TODO Rect { struct ac_image_args txq_args = { 0 }; txq_args.dim = get_ac_sampler_dim(ctx, instr->sampler_dim, instr->is_array); txq_args.opcode = ac_image_get_resinfo; txq_args.dmask = 0xf; - txq_args.addr = ctx->i32_0; + txq_args.lod = ctx->i32_0; txq_args.resource = args->resource; LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args); - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { half_texel[c] = LLVMBuildExtractElement(ctx->builder, size, LLVMConstInt(ctx->i32, c, false), ""); half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]); half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c], LLVMConstReal(ctx->f32, -0.5), ""); } } - LLVMValueRef orig_coords = args->addr; + LLVMValueRef orig_coords[2] = { args->coords[0], args->coords[1] }; - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { LLVMValueRef tmp; - LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0); - tmp = LLVMBuildExtractElement(ctx->builder, coord, index, ""); - tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); - tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], ""); - tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); - coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, ""); + tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, ""); + args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], ""); } - /* * Apparantly cube has issue with integer types that the workaround doesn't solve, * so this tests if the format is 8_8_8_8 and an integer type do an alternate * workaround by sampling using a scaled type and converting. * This is taken from amdgpu-pro shaders. */ /* NOTE this produces some ugly code compared to amdgpu-pro, * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select, * and then reads them back. -pro generates two selects, * one s_cmp for the descriptor rewriting @@ -1229,111 +1221,101 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false), LLVMConstInt(ctx->i32, 0x14000000, false), ""); /* replace the NUM FORMAT in the descriptor */ tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), ""); tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, ""); args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, ""); /* don't modify the coordinates for this case */ - coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, ""); + for (unsigned c = 0; c < 2; ++c) + args->coords[c] = LLVMBuildSelect( + ctx->builder, compare_cube_wa, + orig_coords[c], args->coords[c], ""); } - args->addr = coord; result = ac_build_image_opcode(ctx, args); if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { LLVMValueRef tmp, tmp2; /* if the cube workaround is in place, f2i the result. */ - for (c = 0; c < 4; c++) { + for (unsigned c = 0; c < 4; c++) { tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), ""); if (stype == GLSL_TYPE_UINT) tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, ""); else tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, ""); tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, ""); tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, ""); tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), ""); } } return result; } static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_tex_instr *instr, - bool lod_is_zero, struct ac_image_args *args) { if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); if (ctx->abi->gfx9_stride_size_workaround) { return ac_build_buffer_load_format_gfx9_safe(&ctx->ac, args->resource, - args->addr, + args->coords[0], ctx->ac.i32_0, util_last_bit(mask), false, true); } else { return ac_build_buffer_load_format(&ctx->ac, args->resource, - args->addr, + args->coords[0], ctx->ac.i32_0, util_last_bit(mask), false, true); } } args->opcode = ac_image_sample; - args->compare = instr->is_shadow; switch (instr->op) { case nir_texop_txf: case nir_texop_txf_ms: case nir_texop_samples_identical: - args->opcode = lod_is_zero || + args->opcode = args->level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? ac_image_load : ac_image_load_mip; - args->compare = false; - args->offset = false; - break; - case nir_texop_txb: - args->bias = true; - break; - case nir_texop_txl: - if (lod_is_zero) - args->level_zero = true; - else - args->lod = true; + args->level_zero = false; break; case nir_texop_txs: case nir_texop_query_levels: args->opcode = ac_image_get_resinfo; + if (!args->lod) + args->lod = ctx->ac.i32_0; + args->level_zero = false; break; case nir_texop_tex: - if (ctx->stage != MESA_SHADER_FRAGMENT) + if (ctx->stage != MESA_SHADER_FRAGMENT) { + assert(!args->lod); args->level_zero = true; - break; - case nir_texop_txd: - args->deriv = true; + } break; case nir_texop_tg4: args->opcode = ac_image_gather4; args->level_zero = true; break; case nir_texop_lod: args->opcode = ac_image_get_lod; - args->compare = false; - args->offset = false; break; default: break; } if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) { enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) { return lower_gather4_integer(&ctx->ac, args, instr); } @@ -2074,37 +2056,32 @@ glsl_is_array_image(const struct glsl_type *type) * * The sample index should be adjusted as follows: * sample_index = (fmask >> (sample_index * 4)) & 0xF; */ static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx, LLVMValueRef coord_x, LLVMValueRef coord_y, LLVMValueRef coord_z, LLVMValueRef sample_index, LLVMValueRef fmask_desc_ptr) { - LLVMValueRef fmask_load_address[4]; + struct ac_image_args args = {0}; LLVMValueRef res; - fmask_load_address[0] = coord_x; - fmask_load_address[1] = coord_y; - if (coord_z) { - fmask_load_address[2] = coord_z; - fmask_load_address[3] = LLVMGetUndef(ctx->i32); - } - - struct ac_image_args args = {0}; + args.coords[0] = coord_x; + args.coords[1] = coord_y; + if (coord_z) + args.coords[2] = coord_z; args.opcode = ac_image_load; args.dim = coord_z ? ac_image_2darray : ac_image_2d; args.resource = fmask_desc_ptr; args.dmask = 0xf; - args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2); res = ac_build_image_opcode(ctx, &args); res = ac_to_integer(ctx, res); LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false); LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false); LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder, res, ctx->i32_0, ""); @@ -2435,21 +2412,21 @@ static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx, const nir_variable *var = instr->variables[0]->var; const struct glsl_type *type = glsl_without_array(var->type); struct ac_image_args args = { 0 }; args.dim = get_ac_sampler_dim(&ctx->ac, glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type)); args.dmask = 0xf; args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false); args.opcode = ac_image_get_resinfo; - args.addr = ctx->ac.i32_0; + args.lod = ctx->ac.i32_0; return ac_build_image_opcode(&ctx->ac, &args); } static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { LLVMValueRef res; const nir_variable *var = instr->variables[0]->var; const struct glsl_type *type = glsl_without_array(var->type); @@ -2459,21 +2436,21 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, false), true); struct ac_image_args args = { 0 }; args.dim = get_ac_image_dim(&ctx->ac, glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type)); args.dmask = 0xf; args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false); args.opcode = ac_image_get_resinfo; - args.addr = ctx->ac.i32_0; + args.lod = ctx->ac.i32_0; res = ac_build_image_opcode(&ctx->ac, &args); LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && glsl_sampler_type_is_array(type)) { LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, ""); z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); @@ -3193,52 +3170,20 @@ static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, } } return ctx->abi->load_sampler_desc(ctx->abi, descriptor_set, base_index, constant_index, index, desc_type, image, write, bindless); } -static void set_tex_fetch_args(struct ac_llvm_context *ctx, - struct ac_image_args *args, - const nir_tex_instr *instr, - nir_texop op, - LLVMValueRef res_ptr, LLVMValueRef samp_ptr, - LLVMValueRef *param, unsigned count, - unsigned dmask) -{ - unsigned is_rect = 0; - - /* Pad to power of two vector */ - while (count < util_next_power_of_two(count)) - param[count++] = LLVMGetUndef(ctx->i32); - - if (count > 1) - args->addr = ac_build_gather_values(ctx, param, count); - else - args->addr = param[0]; - - args->resource = res_ptr; - args->sampler = samp_ptr; - - if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) { - args->addr = param[0]; - return; - } - - args->dmask = dmask; - args->unorm = is_rect; - args->dim = get_ac_sampler_dim(&ctx->ac, instr->sampler_dim, instr->is_array); -} - /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. * * SI-CI: * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic * filtering manually. The driver sets img7 to a mask clearing * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: * s_and_b32 samp0, samp0, img7 * * VI: * The ANISO_OVERRIDE sampler field enables this fix in TA. @@ -3289,87 +3234,84 @@ static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx, coord = ac_to_float(ctx, coord); coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 0); coord = ac_to_integer(ctx, coord); return coord; } static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) { LLVMValueRef result = NULL; struct ac_image_args args = { 0 }; - unsigned dmask = 0xf; - LLVMValueRef address[16]; - LLVMValueRef coords[5]; - LLVMValueRef coord = NULL, lod = NULL, comparator = NULL; - LLVMValueRef bias = NULL, offsets = NULL; - LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL; + LLVMValueRef fmask_ptr = NULL, sample_index = NULL; LLVMValueRef ddx = NULL, ddy = NULL; - LLVMValueRef derivs[6]; - unsigned chan, count = 0; - unsigned const_src = 0, num_deriv_comp = 0; - bool lod_is_zero = false; + unsigned offset_src = 0; - tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr); + tex_fetch_ptrs(ctx, instr, &args.resource, &args.sampler, &fmask_ptr); for (unsigned i = 0; i < instr->num_srcs; i++) { switch (instr->src[i].src_type) { - case nir_tex_src_coord: - coord = get_src(ctx, instr->src[i].src); + case nir_tex_src_coord: { + LLVMValueRef coord = get_src(ctx, instr->src[i].src); + for (unsigned chan = 0; chan < instr->coord_components; ++chan) + args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan); break; + } case nir_tex_src_projector: break; case nir_tex_src_comparator: - comparator = get_src(ctx, instr->src[i].src); + if (instr->is_shadow) + args.compare = get_src(ctx, instr->src[i].src); break; case nir_tex_src_offset: - offsets = get_src(ctx, instr->src[i].src); - const_src = i; + args.offset = get_src(ctx, instr->src[i].src); + offset_src = i; break; case nir_tex_src_bias: - bias = get_src(ctx, instr->src[i].src); + if (instr->op == nir_texop_txb) + args.bias = get_src(ctx, instr->src[i].src); break; case nir_tex_src_lod: { nir_const_value *val = nir_src_as_const_value(instr->src[i].src); if (val && val->i32[0] == 0) - lod_is_zero = true; - lod = get_src(ctx, instr->src[i].src); + args.level_zero = true; + else + args.lod = get_src(ctx, instr->src[i].src); break; } case nir_tex_src_ms_index: sample_index = get_src(ctx, instr->src[i].src); break; case nir_tex_src_ms_mcs: break; case nir_tex_src_ddx: ddx = get_src(ctx, instr->src[i].src); - num_deriv_comp = instr->src[i].src.ssa->num_components; break; case nir_tex_src_ddy: ddy = get_src(ctx, instr->src[i].src); break; case nir_tex_src_texture_offset: case nir_tex_src_sampler_offset: case nir_tex_src_plane: default: break; } } if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { - result = get_buffer_size(ctx, res_ptr, true); + result = get_buffer_size(ctx, args.resource, true); goto write_result; } if (instr->op == nir_texop_texture_samples) { LLVMValueRef res, samples, is_msaa; - res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, ""); + res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, ""); samples = LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 3, false), ""); is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 28, false), ""); is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa, LLVMConstInt(ctx->ac.i32, 0xe, false), ""); is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa, LLVMConstInt(ctx->ac.i32, 0xe, false), ""); samples = LLVMBuildLShr(ctx->ac.builder, samples, @@ -3377,240 +3319,173 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) samples = LLVMBuildAnd(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 0xf, false), ""); samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, samples, ""); samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, ctx->ac.i32_1, ""); result = samples; goto write_result; } - if (coord) - for (chan = 0; chan < instr->coord_components; chan++) - coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan); - - if (offsets && instr->op != nir_texop_txf) { + if (args.offset && instr->op != nir_texop_txf) { LLVMValueRef offset[3], pack; - for (chan = 0; chan < 3; ++chan) + for (unsigned chan = 0; chan < 3; ++chan) offset[chan] = ctx->ac.i32_0; - args.offset = true; - for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) { - offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan); + unsigned num_components = ac_get_llvm_num_components(args.offset); + for (unsigned chan = 0; chan < num_components; chan++) { + offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan); offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); if (chan) offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->ac.i32, chan * 8, false), ""); } pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); - address[count++] = pack; - + args.offset = pack; } - /* pack LOD bias value */ - if (instr->op == nir_texop_txb && bias) { - address[count++] = bias; - } - - /* Pack depth comparison value */ - if (instr->is_shadow && comparator) { - LLVMValueRef z = ac_to_float(&ctx->ac, - ac_llvm_extract_elem(&ctx->ac, comparator, 0)); - - /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT, - * so the depth comparison value isn't clamped for Z16 and - * Z24 anymore. Do it manually here. - * - * It's unnecessary if the original texture format was - * Z32_FLOAT, but we don't know that here. - */ - if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference) - z = ac_build_clamp(&ctx->ac, z); - address[count++] = z; - } + /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT, + * so the depth comparison value isn't clamped for Z16 and + * Z24 anymore. Do it manually here. + * + * It's unnecessary if the original texture format was + * Z32_FLOAT, but we don't know that here. + */ + if (args.compare && ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference) + args.compare = ac_build_clamp(&ctx->ac, ac_to_float(&ctx->ac, args.compare)); /* pack derivatives */ if (ddx || ddy) { int num_src_deriv_channels, num_dest_deriv_channels; switch (instr->sampler_dim) { case GLSL_SAMPLER_DIM_3D: case GLSL_SAMPLER_DIM_CUBE: - num_deriv_comp = 3; num_src_deriv_channels = 3; num_dest_deriv_channels = 3; break; case GLSL_SAMPLER_DIM_2D: default: num_src_deriv_channels = 2; num_dest_deriv_channels = 2; - num_deriv_comp = 2; break; case GLSL_SAMPLER_DIM_1D: num_src_deriv_channels = 1; if (ctx->ac.chip_class >= GFX9) { num_dest_deriv_channels = 2; - num_deriv_comp = 2; } else { num_dest_deriv_channels = 1; - num_deriv_comp = 1; } break; } for (unsigned i = 0; i < num_src_deriv_channels; i++) { - derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i)); - derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i)); + args.derivs[i] = ac_to_float(&ctx->ac, + ac_llvm_extract_elem(&ctx->ac, ddx, i)); + args.derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, + ac_llvm_extract_elem(&ctx->ac, ddy, i)); } for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) { - derivs[i] = ctx->ac.f32_0; - derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0; + args.derivs[i] = ctx->ac.f32_0; + args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0; } } - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) { - for (chan = 0; chan < instr->coord_components; chan++) - coords[chan] = ac_to_float(&ctx->ac, coords[chan]); + if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) { + for (unsigned chan = 0; chan < instr->coord_components; chan++) + args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]); if (instr->coord_components == 3) - coords[3] = LLVMGetUndef(ctx->ac.f32); + args.coords[3] = LLVMGetUndef(ctx->ac.f32); ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array, - instr->op == nir_texop_lod, coords, derivs); - if (num_deriv_comp) - num_deriv_comp--; + instr->op == nir_texop_lod, args.coords, args.derivs); } - if (ddx || ddy) { - for (unsigned i = 0; i < num_deriv_comp * 2; i++) - address[count++] = derivs[i]; - } - - /* Pack texture coordinates */ - if (coord) { - address[count++] = coords[0]; - if (instr->coord_components > 1) { - if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) { - coords[1] = apply_round_slice(&ctx->ac, coords[1]); - } - address[count++] = coords[1]; - } - if (instr->coord_components > 2) { - if ((instr->sampler_dim == GLSL_SAMPLER_DIM_2D || - instr->sampler_dim == GLSL_SAMPLER_DIM_MS || - instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS || - instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) && - instr->is_array && - instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) { - coords[2] = apply_round_slice(&ctx->ac, coords[2]); - } - address[count++] = coords[2]; - } - - if (ctx->ac.chip_class >= GFX9) { - LLVMValueRef filler; - if (instr->op == nir_texop_txf) - filler = ctx->ac.i32_0; - else - filler = LLVMConstReal(ctx->ac.f32, 0.5); - - if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) { - /* No nir_texop_lod, because it does not take a slice - * even with array textures. */ - if (instr->is_array && instr->op != nir_texop_lod ) { - address[count] = address[count - 1]; - address[count - 1] = filler; - count++; - } else - address[count++] = filler; - } - } + /* Texture coordinates fixups */ + if (instr->coord_components > 2 && + (instr->sampler_dim == GLSL_SAMPLER_DIM_2D || + instr->sampler_dim == GLSL_SAMPLER_DIM_MS || + instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS || + instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) && + instr->is_array && + instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) { + args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]); } - /* Pack LOD */ - if (lod && ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && !lod_is_zero)) { - address[count++] = lod; - } else if (instr->op == nir_texop_txf_ms && sample_index) { - address[count++] = sample_index; - } else if(instr->op == nir_texop_txs) { - count = 0; - if (lod) - address[count++] = lod; + if (ctx->ac.chip_class >= GFX9 && + instr->sampler_dim == GLSL_SAMPLER_DIM_1D && + instr->op != nir_texop_lod) { + LLVMValueRef filler; + if (instr->op == nir_texop_txf) + filler = ctx->ac.i32_0; else - address[count++] = ctx->ac.i32_0; - } + filler = LLVMConstReal(ctx->ac.f32, 0.5); - for (chan = 0; chan < count; chan++) { - address[chan] = LLVMBuildBitCast(ctx->ac.builder, - address[chan], ctx->ac.i32, ""); + if (instr->is_array) + args.coords[2] = args.coords[1]; + args.coords[1] = filler; } + /* Pack sample index */ + if (instr->op == nir_texop_txf_ms && sample_index) + args.coords[instr->coord_components] = sample_index; + if (instr->op == nir_texop_samples_identical) { - LLVMValueRef txf_address[4]; struct ac_image_args txf_args = { 0 }; - unsigned txf_count = count; - memcpy(txf_address, address, sizeof(txf_address)); - - if (!instr->is_array) - txf_address[2] = ctx->ac.i32_0; - txf_address[3] = ctx->ac.i32_0; - - set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf, - fmask_ptr, NULL, - txf_address, txf_count, 0xf); + memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords)); - result = build_tex_intrinsic(ctx, instr, false, &txf_args); + txf_args.dmask = 0xf; + txf_args.resource = fmask_ptr; + txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d; + result = build_tex_intrinsic(ctx, instr, &txf_args); result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0); goto write_result; } if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS && instr->op != nir_texop_txs) { unsigned sample_chan = instr->is_array ? 3 : 2; - address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac, - address[0], - address[1], - instr->is_array ? address[2] : NULL, - address[sample_chan], - fmask_ptr); + args.coords[sample_chan] = adjust_sample_index_using_fmask( + &ctx->ac, args.coords[0], args.coords[1], + instr->is_array ? args.coords[2] : NULL, + args.coords[sample_chan], fmask_ptr); } - if (offsets && instr->op == nir_texop_txf) { + if (args.offset && instr->op == nir_texop_txf) { nir_const_value *const_offset = - nir_src_as_const_value(instr->src[const_src].src); - int num_offsets = instr->src[const_src].src.ssa->num_components; + nir_src_as_const_value(instr->src[offset_src].src); + int num_offsets = instr->src[offset_src].src.ssa->num_components; assert(const_offset); num_offsets = MIN2(num_offsets, instr->coord_components); - if (num_offsets > 2) - address[2] = LLVMBuildAdd(ctx->ac.builder, - address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), ""); - if (num_offsets > 1) - address[1] = LLVMBuildAdd(ctx->ac.builder, - address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), ""); - address[0] = LLVMBuildAdd(ctx->ac.builder, - address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), ""); - + for (unsigned i = 0; i < num_offsets; ++i) { + args.coords[i] = LLVMBuildAdd( + ctx->ac.builder, args.coords[i], + LLVMConstInt(ctx->ac.i32, const_offset->i32[i], false), ""); + } + args.offset = NULL; } /* TODO TG4 support */ + args.dmask = 0xf; if (instr->op == nir_texop_tg4) { if (instr->is_shadow) - dmask = 1; + args.dmask = 1; else - dmask = 1 << instr->component; + args.dmask = 1 << instr->component; } - set_tex_fetch_args(&ctx->ac, &args, instr, instr->op, - res_ptr, samp_ptr, address, count, dmask); - result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args); + if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) + args.dim = get_ac_sampler_dim(&ctx->ac, instr->sampler_dim, instr->is_array); + result = build_tex_intrinsic(ctx, instr, &args); if (instr->op == nir_texop_query_levels) result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), ""); else if (instr->is_shadow && instr->is_new_style_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4) result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); else if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && instr->is_array) { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h index c92517fee28..d30f9da539e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h @@ -43,21 +43,21 @@ struct lp_build_emit_data { * order of the arguments should be as follows: * SOA: s0.x, s0.y, s0.z, s0.w, s1.x, s1.y, s1.z, s1.w, s2.x, s2.y, s2.x, s2.w * AOS: s0.xyzw, s1.xyzw, s2.xyzw * TEXTURE Instructions: coord.xyzw * * Arguments should be packed into the args array. For example an SOA * instructions that reads s0.x and s1.x args should look like this: * args[0] = s0.x; * args[1] = s1.x; */ - LLVMValueRef args[12]; + LLVMValueRef args[18]; /** * Number of arguments in the args array. */ unsigned arg_count; /** * The type output type of the opcode. This should be set in the * lp_build_tgsi_action::fetch_args function. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index a54db9e8596..1c653839aea 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -1001,46 +1001,30 @@ static void atomic_emit( } tmp = lp_build_intrinsic( builder, intrinsic_name, ctx->i32, emit_data->args, emit_data->arg_count, 0); emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp); } static void set_tex_fetch_args(struct si_shader_context *ctx, struct lp_build_emit_data *emit_data, - unsigned target, - LLVMValueRef res_ptr, LLVMValueRef samp_ptr, - LLVMValueRef *param, unsigned count, - unsigned dmask) + struct ac_image_args *args, + unsigned target) { - struct ac_image_args args = {}; - - /* Pad to power of two vector */ - while (count < util_next_power_of_two(count)) - param[count++] = LLVMGetUndef(ctx->i32); - - if (count > 1) - args.addr = lp_build_gather_values(&ctx->gallivm, param, count); - else - args.addr = param[0]; - - args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target); - args.resource = res_ptr; - args.sampler = samp_ptr; - args.dmask = dmask; - args.unorm = target == TGSI_TEXTURE_RECT || - target == TGSI_TEXTURE_SHADOWRECT; + args->dim = ac_texture_dim_from_tgsi_target(ctx->screen, target); + args->unorm = target == TGSI_TEXTURE_RECT || + target == TGSI_TEXTURE_SHADOWRECT; /* Ugly, but we seem to have no other choice right now. */ - STATIC_ASSERT(sizeof(args) <= sizeof(emit_data->args)); - memcpy(emit_data->args, &args, sizeof(args)); + STATIC_ASSERT(sizeof(*args) <= sizeof(emit_data->args)); + memcpy(emit_data->args, args, sizeof(*args)); } static LLVMValueRef fix_resinfo(struct si_shader_context *ctx, unsigned target, LLVMValueRef out) { LLVMBuilderRef builder = ctx->ac.builder; /* 1D textures are allocated and used as 2D on GFX9. */ if (ctx->screen->info.chip_class >= GFX9 && (target == TGSI_TEXTURE_1D_ARRAY || @@ -1076,33 +1060,33 @@ static void resq_fetch_args( emit_data->dst_type = ctx->v4i32; if (reg->Register.File == TGSI_FILE_BUFFER) { emit_data->args[0] = shader_buffer_fetch_rsrc(ctx, reg, false); emit_data->arg_count = 1; } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture, &emit_data->args[0]); emit_data->arg_count = 1; } else { - LLVMValueRef res_ptr; + struct ac_image_args args = {}; unsigned image_target; if (inst->Memory.Texture == TGSI_TEXTURE_3D) image_target = TGSI_TEXTURE_2D_ARRAY; else image_target = inst->Memory.Texture; image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture, - &res_ptr); - set_tex_fetch_args(ctx, emit_data, image_target, - res_ptr, NULL, &ctx->i32_0, 1, - 0xf); + &args.resource); + args.lod = ctx->i32_0; + args.dmask = 0xf; + set_tex_fetch_args(ctx, emit_data, &args, image_target); } } static void resq_emit( const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMBuilderRef builder = ctx->ac.builder; @@ -1255,36 +1239,35 @@ static void tex_fetch_ptrs( } } static void txq_fetch_args( struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); const struct tgsi_full_instruction *inst = emit_data->inst; unsigned target = inst->Texture.Texture; - LLVMValueRef res_ptr; - LLVMValueRef address; + struct ac_image_args args = {}; - tex_fetch_ptrs(bld_base, emit_data, &res_ptr, NULL, NULL); + tex_fetch_ptrs(bld_base, emit_data, &args.resource, NULL, NULL); if (target == TGSI_TEXTURE_BUFFER) { /* Read the size from the buffer descriptor directly. */ - emit_data->args[0] = get_buffer_size(bld_base, res_ptr); + emit_data->args[0] = get_buffer_size(bld_base, args.resource); return; } /* Textures - set the mip level. */ - address = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X); + args.lod = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X); + args.dmask = 0xf; - set_tex_fetch_args(ctx, emit_data, target, res_ptr, - NULL, &address, 1, 0xf); + set_tex_fetch_args(ctx, emit_data, &args, target); } static void txq_emit(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); struct ac_image_args args; unsigned target = emit_data->inst->Texture.Texture; @@ -1303,57 +1286,51 @@ static void txq_emit(const struct lp_build_tgsi_action *action, } static void tex_fetch_args( struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); const struct tgsi_full_instruction *inst = emit_data->inst; unsigned opcode = inst->Instruction.Opcode; unsigned target = inst->Texture.Texture; - LLVMValueRef coords[5], derivs[6]; - LLVMValueRef address[16]; - unsigned num_coords = tgsi_util_get_texture_coord_dim(target); + struct ac_image_args args = {}; int ref_pos = tgsi_util_get_shadow_ref_src_index(target); - unsigned count = 0; unsigned chan; - unsigned num_deriv_channels = 0; bool has_offset = inst->Texture.NumOffsets > 0; - LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL; - unsigned dmask = 0xf; + LLVMValueRef fmask_ptr = NULL; - tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr); + tex_fetch_ptrs(bld_base, emit_data, &args.resource, &args.sampler, &fmask_ptr); if (target == TGSI_TEXTURE_BUFFER) { emit_data->dst_type = ctx->v4f32; - emit_data->args[0] = res_ptr; + emit_data->args[0] = args.resource; emit_data->args[1] = ctx->i32_0; emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); emit_data->arg_count = 3; return; } /* Fetch and project texture coordinates */ - coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); + args.coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); for (chan = 0; chan < 3; chan++) { - coords[chan] = lp_build_emit_fetch(bld_base, + args.coords[chan] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, chan); if (opcode == TGSI_OPCODE_TXP) - coords[chan] = lp_build_emit_llvm_binary(bld_base, - TGSI_OPCODE_DIV, - coords[chan], - coords[3]); + args.coords[chan] = lp_build_emit_llvm_binary( + bld_base, TGSI_OPCODE_DIV, + args.coords[chan], args.coords[3]); } if (opcode == TGSI_OPCODE_TXP) - coords[3] = ctx->ac.f32_1; + args.coords[3] = ctx->ac.f32_1; /* Pack offsets. */ if (has_offset && opcode != TGSI_OPCODE_TXF && opcode != TGSI_OPCODE_TXF_LZ) { /* The offsets are six-bit signed integers packed like this: * X=[5:0], Y=[13:8], and Z=[21:16]. */ LLVMValueRef offset[3], pack; @@ -1364,237 +1341,222 @@ static void tex_fetch_args( emit_data->inst, 0, chan); offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->i32, 0x3f, 0), ""); if (chan) offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->i32, chan*8, 0), ""); } pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); - address[count++] = pack; + args.offset = pack; } /* Pack LOD bias value */ if (opcode == TGSI_OPCODE_TXB) - address[count++] = coords[3]; + args.bias = args.coords[3]; if (opcode == TGSI_OPCODE_TXB2) - address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); + args.bias = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); /* Pack depth comparison value */ if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) { LLVMValueRef z; if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); } else { assert(ref_pos >= 0); - z = coords[ref_pos]; + z = args.coords[ref_pos]; } /* Section 8.23.1 (Depth Texture Comparison Mode) of the * OpenGL 4.5 spec says: * * "If the texture’s internal format indicates a fixed-point * depth texture, then D_t and D_ref are clamped to the * range [0, 1]; otherwise no clamping is performed." * * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT, * so the depth comparison value isn't clamped for Z16 and * Z24 anymore. Do it manually here. */ if (ctx->screen->info.chip_class >= VI) { LLVMValueRef upgraded; LLVMValueRef clamped; - upgraded = LLVMBuildExtractElement(ctx->ac.builder, samp_ptr, + upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, LLVMConstInt(ctx->i32, 3, false), ""); upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded, LLVMConstInt(ctx->i32, 29, false), ""); upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->i1, ""); clamped = ac_build_clamp(&ctx->ac, z); z = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, z, ""); } - address[count++] = z; + args.compare = z; } /* Pack user derivatives */ if (opcode == TGSI_OPCODE_TXD) { int param, num_src_deriv_channels, num_dst_deriv_channels; switch (target) { case TGSI_TEXTURE_3D: num_src_deriv_channels = 3; num_dst_deriv_channels = 3; - num_deriv_channels = 3; break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_RECT: case TGSI_TEXTURE_SHADOWRECT: case TGSI_TEXTURE_2D_ARRAY: case TGSI_TEXTURE_SHADOW2D_ARRAY: num_src_deriv_channels = 2; num_dst_deriv_channels = 2; - num_deriv_channels = 2; break; case TGSI_TEXTURE_CUBE: case TGSI_TEXTURE_SHADOWCUBE: case TGSI_TEXTURE_CUBE_ARRAY: case TGSI_TEXTURE_SHADOWCUBE_ARRAY: /* Cube derivatives will be converted to 2D. */ num_src_deriv_channels = 3; num_dst_deriv_channels = 3; - num_deriv_channels = 2; break; case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: case TGSI_TEXTURE_1D_ARRAY: case TGSI_TEXTURE_SHADOW1D_ARRAY: num_src_deriv_channels = 1; /* 1D textures are allocated and used as 2D on GFX9. */ if (ctx->screen->info.chip_class >= GFX9) { num_dst_deriv_channels = 2; - num_deriv_channels = 2; } else { num_dst_deriv_channels = 1; - num_deriv_channels = 1; } break; default: unreachable("invalid target"); } for (param = 0; param < 2; param++) { for (chan = 0; chan < num_src_deriv_channels; chan++) - derivs[param * num_dst_deriv_channels + chan] = + args.derivs[param * num_dst_deriv_channels + chan] = lp_build_emit_fetch(bld_base, inst, param+1, chan); /* Fill in the rest with zeros. */ for (chan = num_src_deriv_channels; chan < num_dst_deriv_channels; chan++) - derivs[param * num_dst_deriv_channels + chan] = + args.derivs[param * num_dst_deriv_channels + chan] = ctx->ac.f32_0; } } if (target == TGSI_TEXTURE_CUBE || target == TGSI_TEXTURE_CUBE_ARRAY || target == TGSI_TEXTURE_SHADOWCUBE || target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { ac_prepare_cube_coords(&ctx->ac, opcode == TGSI_OPCODE_TXD, target == TGSI_TEXTURE_CUBE_ARRAY || target == TGSI_TEXTURE_SHADOWCUBE_ARRAY, opcode == TGSI_OPCODE_LODQ, - coords, derivs); + args.coords, args.derivs); } else if (tgsi_is_array_sampler(target) && opcode != TGSI_OPCODE_TXF && opcode != TGSI_OPCODE_TXF_LZ && ctx->screen->info.chip_class <= VI) { unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2; - coords[array_coord] = + args.coords[array_coord] = ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, - &coords[array_coord], 1, 0); + &args.coords[array_coord], 1, 0); } - if (opcode == TGSI_OPCODE_TXD) - for (int i = 0; i < num_deriv_channels * 2; i++) - address[count++] = derivs[i]; - - /* Pack texture coordinates */ - address[count++] = coords[0]; - if (num_coords > 1) - address[count++] = coords[1]; - if (num_coords > 2) - address[count++] = coords[2]; - /* 1D textures are allocated and used as 2D on GFX9. */ if (ctx->screen->info.chip_class >= GFX9) { LLVMValueRef filler; /* Use 0.5, so that we don't sample the border color. */ if (opcode == TGSI_OPCODE_TXF || opcode == TGSI_OPCODE_TXF_LZ) filler = ctx->i32_0; else filler = LLVMConstReal(ctx->f32, 0.5); if (target == TGSI_TEXTURE_1D || target == TGSI_TEXTURE_SHADOW1D) { - address[count++] = filler; + args.coords[1] = filler; } else if (target == TGSI_TEXTURE_1D_ARRAY || target == TGSI_TEXTURE_SHADOW1D_ARRAY) { - address[count] = address[count - 1]; - address[count - 1] = filler; - count++; + args.coords[2] = args.coords[1]; + args.coords[1] = filler; } } /* Pack LOD or sample index */ - if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF) - address[count++] = coords[3]; + if (opcode == TGSI_OPCODE_TXL) + args.lod = args.coords[3]; else if (opcode == TGSI_OPCODE_TXL2) - address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); - - if (count > 16) { - assert(!"Cannot handle more than 16 texture address parameters"); - count = 16; + args.lod = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); + else if (opcode == TGSI_OPCODE_TXF) { + if (target == TGSI_TEXTURE_2D_MSAA) { + /* No LOD, but move sample index into the right place. */ + args.coords[2] = args.coords[3]; + } else if (target != TGSI_TEXTURE_2D_ARRAY_MSAA) { + args.lod = args.coords[3]; + } } - for (chan = 0; chan < count; chan++) - address[chan] = ac_to_integer(&ctx->ac, address[chan]); - if (target == TGSI_TEXTURE_2D_MSAA || target == TGSI_TEXTURE_2D_ARRAY_MSAA) { - ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, address, + ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, args.coords, target == TGSI_TEXTURE_2D_ARRAY_MSAA); } if (opcode == TGSI_OPCODE_TXF || opcode == TGSI_OPCODE_TXF_LZ) { /* add tex offsets */ if (inst->Texture.NumOffsets) { struct lp_build_context *uint_bld = &bld_base->uint_bld; const struct tgsi_texture_offset *off = inst->TexOffsets; assert(inst->Texture.NumOffsets == 1); switch (target) { case TGSI_TEXTURE_3D: - address[2] = lp_build_add(uint_bld, address[2], + args.coords[2] = lp_build_add(uint_bld, args.coords[2], ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ]); /* fall through */ case TGSI_TEXTURE_2D: case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_RECT: case TGSI_TEXTURE_SHADOWRECT: case TGSI_TEXTURE_2D_ARRAY: case TGSI_TEXTURE_SHADOW2D_ARRAY: - address[1] = - lp_build_add(uint_bld, address[1], + args.coords[1] = + lp_build_add(uint_bld, args.coords[1], ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY]); /* fall through */ case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: case TGSI_TEXTURE_1D_ARRAY: case TGSI_TEXTURE_SHADOW1D_ARRAY: - address[0] = - lp_build_add(uint_bld, address[0], + args.coords[0] = + lp_build_add(uint_bld, args.coords[0], ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX]); break; /* texture offsets do not apply to other texture targets */ } } } + args.dmask = 0xf; + if (opcode == TGSI_OPCODE_TG4) { unsigned gather_comp = 0; /* DMASK was repurposed for GATHER4. 4 components are always * returned and DMASK works like a swizzle - it selects * the component to fetch. The only valid DMASK values are * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns * (red,red,red,red) etc.) The ISA document doesn't mention * this. */ @@ -1604,25 +1566,24 @@ static void tex_fetch_args( LLVMValueRef comp_imm; struct tgsi_src_register src1 = inst->Src[1].Register; assert(src1.File == TGSI_FILE_IMMEDIATE); comp_imm = ctx->imms[src1.Index * TGSI_NUM_CHANNELS + src1.SwizzleX]; gather_comp = LLVMConstIntGetZExtValue(comp_imm); gather_comp = CLAMP(gather_comp, 0, 3); } - dmask = 1 << gather_comp; + args.dmask = 1 << gather_comp; } - set_tex_fetch_args(ctx, emit_data, target, res_ptr, - samp_ptr, address, count, dmask); + set_tex_fetch_args(ctx, emit_data, &args, target); } /* Gather4 should follow the same rules as bilinear filtering, but the hardware * incorrectly forces nearest filtering if the texture format is integer. * The only effect it has on Gather4, which always returns 4 texels for * bilinear filtering, is that the final coordinates are off by 0.5 of * the texel size. * * The workaround is to subtract 0.5 from the unnormalized coordinates, * or (0.5 / size) from the normalized coordinates. @@ -1634,28 +1595,21 @@ static void tex_fetch_args( * descriptor was overridden (and hence a fixup of the sampler result is needed). */ static LLVMValueRef si_lower_gather4_integer(struct si_shader_context *ctx, struct ac_image_args *args, unsigned target, enum tgsi_return_type return_type) { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef wa_8888 = NULL; - LLVMValueRef coord = args->addr; LLVMValueRef half_texel[2]; - /* Texture coordinates start after: - * {offset, bias, z-compare, derivatives} - * Only the offset and z-compare can occur here. - */ - unsigned coord_vgpr_index = (int)args->offset + (int)args->compare; - int c; assert(return_type == TGSI_RETURN_TYPE_SINT || return_type == TGSI_RETURN_TYPE_UINT); if (target == TGSI_TEXTURE_CUBE || target == TGSI_TEXTURE_CUBE_ARRAY) { LLVMValueRef formats; LLVMValueRef data_format; LLVMValueRef wa_formats; @@ -1684,76 +1638,73 @@ si_lower_gather4_integer(struct si_shader_context *ctx, args->resource = LLVMBuildInsertElement( builder, args->resource, formats, ctx->i32_1, ""); } if (target == TGSI_TEXTURE_RECT || target == TGSI_TEXTURE_SHADOWRECT) { assert(!wa_8888); half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5); } else { struct tgsi_full_instruction txq_inst = {}; + struct ac_image_args txq_args = {}; struct lp_build_emit_data txq_emit_data = {}; struct lp_build_if_state if_ctx; if (wa_8888) { /* Skip the texture size query entirely if we don't need it. */ lp_build_if(&if_ctx, &ctx->gallivm, LLVMBuildNot(builder, wa_8888, "")); } /* Query the texture size. */ txq_inst.Texture.Texture = target; txq_emit_data.inst = &txq_inst; txq_emit_data.dst_type = ctx->v4i32; - set_tex_fetch_args(ctx, &txq_emit_data, target, - args->resource, NULL, &ctx->i32_0, - 1, 0xf); + txq_args.resource = args->resource; + txq_args.sampler = args->sampler; + txq_args.lod = ctx->ac.i32_0; + txq_args.dmask = 0xf; + set_tex_fetch_args(ctx, &txq_emit_data, &txq_args, target); txq_emit(NULL, &ctx->bld_base, &txq_emit_data); /* Compute -0.5 / size. */ - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { half_texel[c] = LLVMBuildExtractElement(builder, txq_emit_data.output[0], LLVMConstInt(ctx->i32, c, 0), ""); half_texel[c] = LLVMBuildUIToFP(builder, half_texel[c], ctx->f32, ""); half_texel[c] = lp_build_emit_llvm_unary(&ctx->bld_base, TGSI_OPCODE_RCP, half_texel[c]); half_texel[c] = LLVMBuildFMul(builder, half_texel[c], LLVMConstReal(ctx->f32, -0.5), ""); } if (wa_8888) { lp_build_endif(&if_ctx); LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block }; - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 }; half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2, values, bb); } } } - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { LLVMValueRef tmp; - LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0); - - tmp = LLVMBuildExtractElement(builder, coord, index, ""); - tmp = ac_to_float(&ctx->ac, tmp); + tmp = ac_to_float(&ctx->ac, args->coords[c]); tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], ""); - tmp = ac_to_integer(&ctx->ac, tmp); - coord = LLVMBuildInsertElement(builder, coord, tmp, index, ""); + args->coords[c] = ac_to_integer(&ctx->ac, tmp); } - args->addr = coord; - return wa_8888; } /* The second half of the cube texture 8_8_8_8 integer workaround: adjust the * result after the gather operation. */ static LLVMValueRef si_fix_gather4_integer_result(struct si_shader_context *ctx, LLVMValueRef result, enum tgsi_return_type return_type, @@ -1804,58 +1755,49 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, emit_data->args[1], num_channels, false, true); emit_data->output[emit_data->chan] = ac_build_expand_to_vec4(&ctx->ac, result, num_channels); return; } memcpy(&args, emit_data->args, sizeof(args)); /* ugly */ args.opcode = ac_image_sample; - args.compare = tgsi_is_shadow_target(target); - args.offset = inst->Texture.NumOffsets > 0; switch (opcode) { case TGSI_OPCODE_TXF: case TGSI_OPCODE_TXF_LZ: args.opcode = opcode == TGSI_OPCODE_TXF_LZ || target == TGSI_TEXTURE_2D_MSAA || target == TGSI_TEXTURE_2D_ARRAY_MSAA ? ac_image_load : ac_image_load_mip; - args.compare = false; - args.offset = false; break; case TGSI_OPCODE_LODQ: args.opcode = ac_image_get_lod; - args.compare = false; - args.offset = false; break; case TGSI_OPCODE_TEX: case TGSI_OPCODE_TEX2: case TGSI_OPCODE_TXP: if (ctx->type != PIPE_SHADER_FRAGMENT) args.level_zero = true; break; case TGSI_OPCODE_TEX_LZ: args.level_zero = true; break; case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXB2: assert(ctx->type == PIPE_SHADER_FRAGMENT); - args.bias = true; break; case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXL2: - args.lod = true; break; case TGSI_OPCODE_TXD: - args.deriv = true; break; case TGSI_OPCODE_TG4: args.opcode = ac_image_gather4; args.level_zero = true; break; default: assert(0); return; } @@ -1890,21 +1832,20 @@ static void si_llvm_emit_txqs( const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef res, samples; LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL; tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr); - /* Read the samples from the descriptor directly. */ res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->v8i32, ""); samples = LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->i32, 3, 0), ""); samples = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->i32, 16, 0), ""); samples = LLVMBuildAnd(ctx->ac.builder, samples, LLVMConstInt(ctx->i32, 0xf, 0), ""); samples = LLVMBuildShl(ctx->ac.builder, ctx->i32_1, samples, ""); @@ -1925,50 +1866,43 @@ static void si_llvm_emit_fbfetch(const struct lp_build_tgsi_action *action, */ /* Load the image descriptor. */ STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0); ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr, ac_array_in_const32_addr_space(ctx->v8i32), ""); image = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0)); - LLVMValueRef addr[4]; unsigned chan = 0; - addr[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 0, 16); + args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 0, 16); if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D) - addr[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 16, 16); + args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 16, 16); /* Get the current render target layer index. */ if (ctx->shader->key.mono.u.ps.fbfetch_layered) - addr[chan++] = si_unpack_param(ctx, SI_PARAM_ANCILLARY, 16, 11); + args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_ANCILLARY, 16, 11); if (ctx->shader->key.mono.u.ps.fbfetch_msaa) - addr[chan++] = si_get_sample_id(ctx); - - while (chan < 4) - addr[chan++] = LLVMGetUndef(ctx->i32); + args.coords[chan++] = si_get_sample_id(ctx); if (ctx->shader->key.mono.u.ps.fbfetch_msaa) { fmask = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0)); - ac_apply_fmask_to_sample(&ctx->ac, fmask, addr, false); + ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords, false); } - addr_vec = ac_build_gather_values(&ctx->ac, addr, ARRAY_SIZE(addr)); - args.opcode = ac_image_load; args.resource = image; - args.addr = addr_vec; args.dmask = 0xf; if (ctx->shader->key.mono.u.ps.fbfetch_msaa) args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_2darraymsaa : ac_image_2dmsaa; else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D) args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_1darray : ac_image_1d; else args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_2darray : ac_image_2d; -- 2.14.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev