From: Nicolai Hähnle <nicolai.haeh...@amd.com> In preparation of dimension-aware LLVM image intrinsics. --- src/amd/common/ac_llvm_build.c | 105 +++++++--- src/amd/common/ac_llvm_build.h | 37 +++- src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h | 2 +- src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 230 +++++++++------------ 4 files changed, 210 insertions(+), 164 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index edc729c0127..431ec1defb9 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1466,35 +1466,40 @@ static unsigned ac_num_derivs(enum ac_image_dim dim) case ac_image_2darraymsaa: default: unreachable("derivatives not supported"); } } LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a) { LLVMValueRef args[16]; + LLVMTypeRef retty = ctx->v4f32; const char *name = NULL; - char intr_name[128], type[64]; + const char *atomic_subop = ""; + char intr_name[128], coords_type[64]; assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 || !a->level_zero); - assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip) || + assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip && + a->opcode != ac_image_store_mip) || a->lod); assert((a->bias ? 1 : 0) + (a->lod ? 1 : 0) + (a->level_zero ? 1 : 0) + (a->derivs[0] ? 1 : 0) <= 1); bool sample = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || a->opcode == ac_image_get_lod; + bool atomic = a->opcode == ac_image_atomic || + a->opcode == ac_image_atomic_cmpswap; bool da = a->dim == ac_image_cube || a->dim == ac_image_1darray || a->dim == ac_image_2darray || a->dim == ac_image_2darraymsaa; if (a->opcode == ac_image_get_lod) da = false; unsigned num_coords = a->opcode != ac_image_get_resinfo ? ac_num_coords(a->dim) : 0; LLVMValueRef addr; @@ -1516,80 +1521,126 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, if (a->lod) args[num_addr++] = ac_to_integer(ctx, a->lod); unsigned pad_goal = util_next_power_of_two(num_addr); while (num_addr < pad_goal) args[num_addr++] = LLVMGetUndef(ctx->i32); addr = ac_build_gather_values(ctx, args, num_addr); unsigned num_args = 0; + if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) { + args[num_args++] = a->data[0]; + if (a->opcode == ac_image_atomic_cmpswap) + args[num_args++] = a->data[1]; + } + + unsigned coords_arg = num_args; if (sample) args[num_args++] = ac_to_float(ctx, addr); else args[num_args++] = ac_to_integer(ctx, addr); args[num_args++] = a->resource; if (sample) args[num_args++] = a->sampler; - args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0); - if (sample) - args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0); - args[num_args++] = ctx->i1false; /* glc */ - args[num_args++] = ctx->i1false; /* slc */ - args[num_args++] = ctx->i1false; /* lwe */ - args[num_args++] = LLVMConstInt(ctx->i1, da, 0); + if (!atomic) { + args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0); + if (sample) + args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0); + args[num_args++] = a->cache_policy & ac_glc ? ctx->i1true : ctx->i1false; + args[num_args++] = a->cache_policy & ac_slc ? ctx->i1true : ctx->i1false; + args[num_args++] = ctx->i1false; /* lwe */ + args[num_args++] = LLVMConstInt(ctx->i1, da, 0); + } else { + args[num_args++] = ctx->i1false; /* r128 */ + args[num_args++] = LLVMConstInt(ctx->i1, da, 0); + args[num_args++] = a->cache_policy & ac_slc ? ctx->i1true : ctx->i1false; + } switch (a->opcode) { case ac_image_sample: name = "llvm.amdgcn.image.sample"; break; case ac_image_gather4: name = "llvm.amdgcn.image.gather4"; break; case ac_image_load: name = "llvm.amdgcn.image.load"; break; case ac_image_load_mip: name = "llvm.amdgcn.image.load.mip"; break; + case ac_image_store: + name = "llvm.amdgcn.image.store"; + retty = ctx->voidt; + break; + case ac_image_store_mip: + name = "llvm.amdgcn.image.store.mip"; + retty = ctx->voidt; + break; + case ac_image_atomic: + case ac_image_atomic_cmpswap: + name = "llvm.amdgcn.image.atomic."; + retty = ctx->i32; + if (a->opcode == ac_image_atomic_cmpswap) { + atomic_subop = "cmpswap"; + } else { + switch (a->atomic) { + case ac_atomic_swap: atomic_subop = "swap"; break; + case ac_atomic_add: atomic_subop = "add"; break; + case ac_atomic_sub: atomic_subop = "sub"; break; + case ac_atomic_smin: atomic_subop = "smin"; break; + case ac_atomic_umin: atomic_subop = "umin"; break; + case ac_atomic_smax: atomic_subop = "smax"; break; + case ac_atomic_umax: atomic_subop = "umax"; break; + case ac_atomic_and: atomic_subop = "and"; break; + case ac_atomic_or: atomic_subop = "or"; break; + case ac_atomic_xor: atomic_subop = "xor"; break; + } + } + break; case ac_image_get_lod: name = "llvm.amdgcn.image.getlod"; break; case ac_image_get_resinfo: name = "llvm.amdgcn.image.getresinfo"; break; default: unreachable("invalid image opcode"); } - ac_build_type_name_for_intr(LLVMTypeOf(args[0]), type, - sizeof(type)); + ac_build_type_name_for_intr(LLVMTypeOf(args[coords_arg]), coords_type, + sizeof(coords_type)); - bool lod_suffix = - a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4); - - snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32", - name, - a->compare ? ".c" : "", - a->bias ? ".b" : - lod_suffix ? ".l" : - a->derivs[0] ? ".d" : - a->level_zero ? ".lz" : "", - a->offset ? ".o" : "", - type); + if (atomic) { + snprintf(intr_name, sizeof(intr_name), "llvm.amdgcn.image.atomic.%s.%s", + atomic_subop, coords_type); + } else { + bool lod_suffix = + a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4); + + snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32", + name, + a->compare ? ".c" : "", + a->bias ? ".b" : + lod_suffix ? ".l" : + a->derivs[0] ? ".d" : + a->level_zero ? ".lz" : "", + a->offset ? ".o" : "", + coords_type); + } LLVMValueRef result = - ac_build_intrinsic(ctx, intr_name, - ctx->v4f32, args, num_args, - AC_FUNC_ATTR_READNONE); - if (!sample) { + ac_build_intrinsic(ctx, intr_name, retty, args, num_args, + a->attributes); + if (!sample && retty == ctx->v4f32) { result = LLVMBuildBitCast(ctx->builder, result, ctx->v4i32, ""); } return result; } LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2]) { if (HAVE_LLVM >= 0x0500) { diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index fcd465ef070..6869ac68ab4 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -306,50 +306,77 @@ struct ac_export_args { void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a); void ac_build_export_null(struct ac_llvm_context *ctx); enum ac_image_opcode { ac_image_sample, ac_image_gather4, ac_image_load, ac_image_load_mip, + ac_image_store, + ac_image_store_mip, ac_image_get_lod, ac_image_get_resinfo, + ac_image_atomic, + ac_image_atomic_cmpswap, +}; + +enum ac_atomic_op { + ac_atomic_swap, + ac_atomic_add, + ac_atomic_sub, + ac_atomic_smin, + ac_atomic_umin, + ac_atomic_smax, + ac_atomic_umax, + ac_atomic_and, + ac_atomic_or, + ac_atomic_xor, }; enum ac_image_dim { ac_image_1d, ac_image_2d, ac_image_3d, ac_image_cube, // includes cube arrays ac_image_1darray, ac_image_2darray, ac_image_2dmsaa, ac_image_2darraymsaa, }; +/* These cache policy bits match the definitions used by the LLVM intrinsics. */ +enum ac_image_cache_policy { + ac_glc = 1 << 0, + ac_slc = 1 << 1, +}; + struct ac_image_args { - enum ac_image_opcode opcode; - enum ac_image_dim dim; + enum ac_image_opcode opcode : 4; + enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */ + enum ac_image_dim dim : 3; + unsigned dmask : 4; + unsigned cache_policy : 2; + bool unorm : 1; + bool level_zero : 1; + unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */ LLVMValueRef resource; LLVMValueRef sampler; + LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is cmp for cmpswap */ LLVMValueRef offset; LLVMValueRef bias; LLVMValueRef compare; LLVMValueRef derivs[6]; LLVMValueRef coords[4]; LLVMValueRef lod; // also used by ac_image_get_resinfo - unsigned dmask; - bool unorm; - bool level_zero; }; LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a); LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2]); LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2]); LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2]); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h index d30f9da539e..dbe614d57c5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h @@ -43,21 +43,21 @@ struct lp_build_emit_data { * order of the arguments should be as follows: * SOA: s0.x, s0.y, s0.z, s0.w, s1.x, s1.y, s1.z, s1.w, s2.x, s2.y, s2.x, s2.w * AOS: s0.xyzw, s1.xyzw, s2.xyzw * TEXTURE Instructions: coord.xyzw * * Arguments should be packed into the args array. For example an SOA * instructions that reads s0.x and s1.x args should look like this: * args[0] = s0.x; * args[1] = s1.x; */ - LLVMValueRef args[18]; + LLVMValueRef args[20]; /** * Number of arguments in the args array. */ unsigned arg_count; /** * The type output type of the opcode. This should be set in the * lp_build_tgsi_action::fetch_args function. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 1c653839aea..7eabcbabfdb 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -84,30 +84,20 @@ shader_buffer_fetch_rsrc(struct si_shader_context *ctx, index = si_get_indirect_index(ctx, ®->Indirect, 1, reg->Register.Index); } if (ubo) return ctx->abi.load_ubo(&ctx->abi, index); else return ctx->abi.load_ssbo(&ctx->abi, index, false); } -static bool tgsi_is_array_image(unsigned target) -{ - return target == TGSI_TEXTURE_3D || - target == TGSI_TEXTURE_CUBE || - target == TGSI_TEXTURE_1D_ARRAY || - target == TGSI_TEXTURE_2D_ARRAY || - target == TGSI_TEXTURE_CUBE_ARRAY || - target == TGSI_TEXTURE_2D_ARRAY_MSAA; -} - static enum ac_image_dim ac_texture_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target) { switch (target) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: if (screen->info.chip_class >= GFX9) return ac_image_2d; return ac_image_1d; case TGSI_TEXTURE_2D: @@ -132,20 +122,42 @@ ac_texture_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type return ac_image_2darray; case TGSI_TEXTURE_2D_MSAA: return ac_image_2dmsaa; case TGSI_TEXTURE_2D_ARRAY_MSAA: return ac_image_2darraymsaa; default: unreachable("unhandled texture type"); } } +static enum ac_image_dim +ac_image_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target) +{ + enum ac_image_dim dim = ac_texture_dim_from_tgsi_target(screen, target); + + /* Match the resource type set in the descriptor. */ + if (dim == ac_image_cube || + (screen->info.chip_class <= VI && dim == ac_image_3d)) + dim = ac_image_2darray; + else if (target == TGSI_TEXTURE_2D && screen->info.chip_class >= GFX9) { + /* When a single layer of a 3D texture is bound, the shader + * will refer to a 2D target, but the descriptor has a 3D type. + * Since the HW ignores BASE_ARRAY in this case, we need to + * send 3 coordinates. This doesn't hurt when the underlying + * texture is non-3D. + */ + dim = ac_image_3d; + } + + return dim; +} + /** * Given a 256-bit resource descriptor, force the DCC enable bit to off. * * At least on Tonga, executing image stores on images with DCC enabled and * non-trivial can eventually lead to lockups. This can occur when an * application binds an image as read-only but then uses a shader that writes * to it. The OpenGL spec allows almost arbitrarily bad behavior (including * program termination) in this case, but it doesn't cost much to be a bit * nicer: disabling DCC in the shader still leads to undefined results but * avoids the lockup. @@ -248,114 +260,62 @@ image_fetch_rsrc( */ index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); } *rsrc = si_load_image_desc(ctx, rsrc_ptr, index, target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE, dcc_off); } -static LLVMValueRef image_fetch_coords( +static void image_fetch_coords( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_instruction *inst, - unsigned src, LLVMValueRef desc) + unsigned src, LLVMValueRef desc, + LLVMValueRef *coords) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMBuilderRef builder = ctx->ac.builder; unsigned target = inst->Memory.Texture; - unsigned num_coords = tgsi_util_get_texture_coord_dim(target); - LLVMValueRef coords[4]; + const unsigned num_coords = tgsi_util_get_texture_coord_dim(target); LLVMValueRef tmp; int chan; for (chan = 0; chan < num_coords; ++chan) { tmp = lp_build_emit_fetch(bld_base, inst, src, chan); tmp = ac_to_integer(&ctx->ac, tmp); coords[chan] = tmp; } if (ctx->screen->info.chip_class >= GFX9) { /* 1D textures are allocated and used as 2D on GFX9. */ if (target == TGSI_TEXTURE_1D) { coords[1] = ctx->i32_0; - num_coords++; } else if (target == TGSI_TEXTURE_1D_ARRAY) { coords[2] = coords[1]; coords[1] = ctx->i32_0; - num_coords++; } else if (target == TGSI_TEXTURE_2D) { /* The hw can't bind a slice of a 3D image as a 2D * image, because it ignores BASE_ARRAY if the target * is 3D. The workaround is to read BASE_ARRAY and set * it as the 3rd address operand for all 2D images. */ LLVMValueRef first_layer, const5, mask; const5 = LLVMConstInt(ctx->i32, 5, 0); mask = LLVMConstInt(ctx->i32, S_008F24_BASE_ARRAY(~0), 0); first_layer = LLVMBuildExtractElement(builder, desc, const5, ""); first_layer = LLVMBuildAnd(builder, first_layer, mask, ""); coords[2] = first_layer; - num_coords++; - } - } - - if (num_coords == 1) - return coords[0]; - - if (num_coords == 3) { - /* LLVM has difficulties lowering 3-element vectors. */ - coords[3] = bld_base->uint_bld.undef; - num_coords = 4; - } - - return lp_build_gather_values(&ctx->gallivm, coords, num_coords); -} - -/** - * Append the extra mode bits that are used by image load and store. - */ -static void image_append_args( - struct si_shader_context *ctx, - struct lp_build_emit_data * emit_data, - unsigned target, - bool atomic, - bool force_glc) -{ - const struct tgsi_full_instruction *inst = emit_data->inst; - LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0); - LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0); - LLVMValueRef r128 = i1false; - LLVMValueRef da = tgsi_is_array_image(target) ? i1true : i1false; - LLVMValueRef glc = - force_glc || - inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ? - i1true : i1false; - LLVMValueRef slc = i1false; - LLVMValueRef lwe = i1false; - - if (atomic) { - emit_data->args[emit_data->arg_count++] = r128; - emit_data->args[emit_data->arg_count++] = da; - if (!atomic) { - emit_data->args[emit_data->arg_count++] = glc; } - emit_data->args[emit_data->arg_count++] = slc; - return; } - - emit_data->args[emit_data->arg_count++] = glc; - emit_data->args[emit_data->arg_count++] = slc; - emit_data->args[emit_data->arg_count++] = lwe; - emit_data->args[emit_data->arg_count++] = da; } /** * Append the resource and indexing arguments for buffer intrinsics. * * \param rsrc the v4i32 buffer resource * \param index index into the buffer (stride-based) * \param offset byte offset into the buffer */ static void buffer_append_args( @@ -402,35 +362,28 @@ static void load_fetch_args( bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF; rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo); tmp = lp_build_emit_fetch(bld_base, inst, 1, 0); offset = ac_to_integer(&ctx->ac, tmp); buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0, offset, false, false); } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE || tgsi_is_bindless_image_file(inst->Src[0].Register.File)) { - LLVMValueRef coords; - image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &rsrc); - coords = image_fetch_coords(bld_base, inst, 1, rsrc); + image_fetch_coords(bld_base, inst, 1, rsrc, &emit_data->args[1]); if (target == TGSI_TEXTURE_BUFFER) { - buffer_append_args(ctx, emit_data, rsrc, coords, + buffer_append_args(ctx, emit_data, rsrc, emit_data->args[1], ctx->i32_0, false, false); } else { - emit_data->args[0] = coords; - emit_data->args[1] = rsrc; - emit_data->args[2] = LLVMConstInt(ctx->i32, 15, 0); /* dmask */ - emit_data->arg_count = 3; - - image_append_args(ctx, emit_data, target, false, false); + emit_data->args[0] = rsrc; } } } static void load_emit_buffer(struct si_shader_context *ctx, struct lp_build_emit_data *emit_data, bool can_speculate, bool allow_smem) { const struct tgsi_full_instruction *inst = emit_data->inst; uint writemask = inst->Dst[0].Register.WriteMask; @@ -565,24 +518,22 @@ static bool is_oneway_access_only(const struct tgsi_full_instruction *inst, } return false; } static void load_emit( const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; const struct tgsi_full_instruction * inst = emit_data->inst; const struct tgsi_shader_info *info = &ctx->shader->selector->info; - char intrinsic_name[64]; bool can_speculate = false; if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) { load_emit_memory(ctx, emit_data); return; } if (inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) { load_emit_buffer(ctx, emit_data, true, true); return; @@ -609,31 +560,32 @@ static void load_emit( ac_build_buffer_load_format(&ctx->ac, emit_data->args[0], emit_data->args[1], emit_data->args[2], num_channels, LLVMConstIntGetZExtValue(emit_data->args[3]), can_speculate); emit_data->output[emit_data->chan] = ac_build_expand_to_vec4(&ctx->ac, result, num_channels); } else { - ac_get_image_intr_name("llvm.amdgcn.image.load", - emit_data->dst_type, /* vdata */ - LLVMTypeOf(emit_data->args[0]), /* coords */ - LLVMTypeOf(emit_data->args[1]), /* rsrc */ - intrinsic_name, sizeof(intrinsic_name)); + struct ac_image_args args = {}; + args.opcode = ac_image_load; + args.resource = emit_data->args[0]; + memcpy(args.coords, &emit_data->args[1], sizeof(args.coords)); + args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); + if (inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE)) + args.cache_policy = ac_glc; + args.attributes = ac_get_load_intr_attribs(can_speculate); + args.dmask = 0xf; emit_data->output[emit_data->chan] = - lp_build_intrinsic( - builder, intrinsic_name, emit_data->dst_type, - emit_data->args, emit_data->arg_count, - ac_get_load_intr_attribs(can_speculate)); + ac_build_image_opcode(&ctx->ac, &args); } } static void store_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); const struct tgsi_full_instruction * inst = emit_data->inst; struct tgsi_full_src_register memory; @@ -660,43 +612,37 @@ static void store_fetch_args( rsrc = shader_buffer_fetch_rsrc(ctx, &memory, false); tmp = lp_build_emit_fetch(bld_base, inst, 0, 0); offset = ac_to_integer(&ctx->ac, tmp); buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0, offset, false, false); } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE || tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) { unsigned target = inst->Memory.Texture; - LLVMValueRef coords; /* 8bit/16bit TC L1 write corruption bug on SI. * All store opcodes not aligned to a dword are affected. * * The only way to get unaligned stores in radeonsi is through * shader images. */ bool force_glc = ctx->screen->info.chip_class == SI; image_fetch_rsrc(bld_base, &memory, true, target, &rsrc); - coords = image_fetch_coords(bld_base, inst, 0, rsrc); + image_fetch_coords(bld_base, inst, 0, rsrc, &emit_data->args[2]); if (target == TGSI_TEXTURE_BUFFER) { - buffer_append_args(ctx, emit_data, rsrc, coords, + buffer_append_args(ctx, emit_data, rsrc, emit_data->args[2], ctx->i32_0, false, force_glc); } else { - emit_data->args[1] = coords; - emit_data->args[2] = rsrc; - emit_data->args[3] = LLVMConstInt(ctx->i32, 15, 0); /* dmask */ - emit_data->arg_count = 4; - - image_append_args(ctx, emit_data, target, false, force_glc); + emit_data->args[1] = rsrc; } } } static void store_emit_buffer( struct si_shader_context *ctx, struct lp_build_emit_data *emit_data, bool writeonly_memory) { const struct tgsi_full_instruction *inst = emit_data->inst; @@ -792,21 +738,20 @@ static void store_emit_memory( static void store_emit( const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMBuilderRef builder = ctx->ac.builder; const struct tgsi_full_instruction * inst = emit_data->inst; const struct tgsi_shader_info *info = &ctx->shader->selector->info; unsigned target = inst->Memory.Texture; - char intrinsic_name[64]; bool writeonly_memory = false; if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) { store_emit_memory(ctx, emit_data); return; } if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) ac_build_waitcnt(&ctx->ac, VM_CNT); @@ -821,31 +766,39 @@ static void store_emit( return; } if (target == TGSI_TEXTURE_BUFFER) { emit_data->output[emit_data->chan] = lp_build_intrinsic( builder, "llvm.amdgcn.buffer.store.format.v4f32", emit_data->dst_type, emit_data->args, emit_data->arg_count, ac_get_store_intr_attribs(writeonly_memory)); } else { - ac_get_image_intr_name("llvm.amdgcn.image.store", - LLVMTypeOf(emit_data->args[0]), /* vdata */ - LLVMTypeOf(emit_data->args[1]), /* coords */ - LLVMTypeOf(emit_data->args[2]), /* rsrc */ - intrinsic_name, sizeof(intrinsic_name)); + struct ac_image_args args = {}; + args.opcode = ac_image_store; + args.data[0] = emit_data->args[0]; + args.resource = emit_data->args[1]; + memcpy(args.coords, &emit_data->args[2], sizeof(args.coords)); + args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); + args.attributes = ac_get_store_intr_attribs(writeonly_memory); + args.dmask = 0xf; + + /* Workaround for 8bit/16bit TC L1 write corruption bug on SI. + * All store opcodes not aligned to a dword are affected. + */ + bool force_glc = ctx->screen->info.chip_class == SI; + if (force_glc || + inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE)) + args.cache_policy = ac_glc; emit_data->output[emit_data->chan] = - lp_build_intrinsic( - builder, intrinsic_name, emit_data->dst_type, - emit_data->args, emit_data->arg_count, - ac_get_store_intr_attribs(writeonly_memory)); + ac_build_image_opcode(&ctx->ac, &args); } } static void atomic_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); const struct tgsi_full_instruction * inst = emit_data->inst; LLVMValueRef data1, data2; @@ -875,33 +828,31 @@ static void atomic_fetch_args( rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false); tmp = lp_build_emit_fetch(bld_base, inst, 1, 0); offset = ac_to_integer(&ctx->ac, tmp); buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0, offset, true, false); } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE || tgsi_is_bindless_image_file(inst->Src[0].Register.File)) { unsigned target = inst->Memory.Texture; - LLVMValueRef coords; image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc); - coords = image_fetch_coords(bld_base, inst, 1, rsrc); + image_fetch_coords(bld_base, inst, 1, rsrc, + &emit_data->args[emit_data->arg_count + 1]); if (target == TGSI_TEXTURE_BUFFER) { - buffer_append_args(ctx, emit_data, rsrc, coords, + buffer_append_args(ctx, emit_data, rsrc, + emit_data->args[emit_data->arg_count + 1], ctx->i32_0, true, false); } else { - emit_data->args[emit_data->arg_count++] = coords; - emit_data->args[emit_data->arg_count++] = rsrc; - - image_append_args(ctx, emit_data, target, true, false); + emit_data->args[emit_data->arg_count] = rsrc; } } } static void atomic_emit_memory(struct si_shader_context *ctx, struct lp_build_emit_data *emit_data) { LLVMBuilderRef builder = ctx->ac.builder; const struct tgsi_full_instruction * inst = emit_data->inst; LLVMValueRef ptr, result, arg; @@ -966,51 +917,68 @@ static void atomic_emit_memory(struct si_shader_context *ctx, } static void atomic_emit( const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMBuilderRef builder = ctx->ac.builder; const struct tgsi_full_instruction * inst = emit_data->inst; - char intrinsic_name[40]; LLVMValueRef tmp; if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) { atomic_emit_memory(ctx, emit_data); return; } if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { + char intrinsic_name[40]; snprintf(intrinsic_name, sizeof(intrinsic_name), "llvm.amdgcn.buffer.atomic.%s", action->intr_name); + tmp = lp_build_intrinsic( + builder, intrinsic_name, ctx->i32, + emit_data->args, emit_data->arg_count, 0); + emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp); } else { - LLVMValueRef coords; - char coords_type[8]; + unsigned num_data = inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS ? 2 : 1; + struct ac_image_args args = {}; - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) - coords = emit_data->args[2]; - else - coords = emit_data->args[1]; + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { + args.opcode = ac_image_atomic_cmpswap; + } else { + args.opcode = ac_image_atomic; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ATOMXCHG: args.atomic = ac_atomic_swap; break; + case TGSI_OPCODE_ATOMUADD: args.atomic = ac_atomic_add; break; + case TGSI_OPCODE_ATOMAND: args.atomic = ac_atomic_and; break; + case TGSI_OPCODE_ATOMOR: args.atomic = ac_atomic_or; break; + case TGSI_OPCODE_ATOMXOR: args.atomic = ac_atomic_xor; break; + case TGSI_OPCODE_ATOMUMIN: args.atomic = ac_atomic_umin; break; + case TGSI_OPCODE_ATOMUMAX: args.atomic = ac_atomic_umax; break; + case TGSI_OPCODE_ATOMIMIN: args.atomic = ac_atomic_smin; break; + case TGSI_OPCODE_ATOMIMAX: args.atomic = ac_atomic_smax; break; + default: unreachable("unhandled image atomic"); + } + } - ac_build_type_name_for_intr(LLVMTypeOf(coords), coords_type, sizeof(coords_type)); - snprintf(intrinsic_name, sizeof(intrinsic_name), - "llvm.amdgcn.image.atomic.%s.%s", - action->intr_name, coords_type); - } + for (unsigned i = 0; i < num_data; ++i) + args.data[i] = emit_data->args[i]; - tmp = lp_build_intrinsic( - builder, intrinsic_name, ctx->i32, - emit_data->args, emit_data->arg_count, 0); - emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp); + args.resource = emit_data->args[num_data]; + memcpy(args.coords, &emit_data->args[num_data + 1], sizeof(args.coords)); + args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); + + emit_data->output[emit_data->chan] = + ac_to_float(&ctx->ac, ac_build_image_opcode(&ctx->ac, &args)); + } } static void set_tex_fetch_args(struct si_shader_context *ctx, struct lp_build_emit_data *emit_data, struct ac_image_args *args, unsigned target) { args->dim = ac_texture_dim_from_tgsi_target(ctx->screen, target); args->unorm = target == TGSI_TEXTURE_RECT || target == TGSI_TEXTURE_SHADOWRECT; @@ -1809,22 +1777,22 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN); if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT || inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) { gather4_int_result_workaround = si_lower_gather4_integer(ctx, &args, target, inst->Texture.ReturnType); } } - LLVMValueRef result = - ac_build_image_opcode(&ctx->ac, &args); + args.attributes = AC_FUNC_ATTR_READNONE; + LLVMValueRef result = ac_build_image_opcode(&ctx->ac, &args); if (gather4_int_result_workaround) { result = si_fix_gather4_integer_result(ctx, result, inst->Texture.ReturnType, gather4_int_result_workaround); } emit_data->output[emit_data->chan] = result; } -- 2.14.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev