On Fri, Nov 16, 2018 at 9:34 AM Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > > > On 11/12/18 10:51 PM, Bas Nieuwenhuizen wrote: > > These force the index to be used in the instruction so we don't need the > > workaround. > > > > Totals: > > SGPRS: 1321642 -> 1321802 (0.01 %) > > VGPRS: 943664 -> 943788 (0.01 %) > > Spilled SGPRs: 28468 -> 28480 (0.04 %) > > Spilled VGPRs: 88 -> 89 (1.14 %) > > Private memory VGPRs: 0 -> 0 (0.00 %) > > Scratch size: 80 -> 80 (0.00 %) dwords per thread > > Code Size: 52415292 -> 52338932 (-0.15 %) bytes > > LDS: 400 -> 400 (0.00 %) blocks > > Max Waves: 233903 -> 233803 (-0.04 %) > > Wait states: 0 -> 0 (0.00 %) > > > > Totals from affected shaders: > > SGPRS: 238344 -> 238504 (0.07 %) > > VGPRS: 232732 -> 232856 (0.05 %) > > Spilled SGPRs: 13125 -> 13137 (0.09 %) > > Spilled VGPRs: 88 -> 89 (1.14 %) > > Private memory VGPRs: 0 -> 0 (0.00 %) > > Scratch size: 80 -> 80 (0.00 %) dwords per thread > > Code Size: 15752712 -> 15676352 (-0.48 %) bytes > > LDS: 139 -> 139 (0.00 %) blocks > > Max Waves: 31680 -> 31580 (-0.32 %) > > Wait states: 0 -> 0 (0.00 %) > > --- > > src/amd/common/ac_llvm_build.c | 52 +++++++++++++++++++++++++++++++ > > src/amd/common/ac_nir_to_llvm.c | 29 ++++++++++++----- > > src/amd/vulkan/radv_nir_to_llvm.c | 2 +- > > 3 files changed, 75 insertions(+), 8 deletions(-) > > > > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c > > index 1392ec0f238..22245aadba1 100644 > > --- a/src/amd/common/ac_llvm_build.c > > +++ b/src/amd/common/ac_llvm_build.c > > @@ -1161,6 +1161,47 @@ ac_build_buffer_load_common(struct ac_llvm_context > > *ctx, > > ac_get_load_intr_attribs(can_speculate)); > > } > > > > +static LLVMValueRef > > +ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx, > > + LLVMValueRef rsrc, > > + LLVMValueRef vindex, > > + LLVMValueRef voffset, > > + LLVMValueRef soffset, > > + unsigned num_channels, > > + bool glc, > > + bool slc, > > + bool can_speculate, > > + bool use_format, > > + bool structurized) > > +{ > > + LLVMValueRef args[5]; > > + int idx = 0; > > + args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); > > + if (structurized) > > + args[idx++] = vindex ? vindex : ctx->i32_0; > > + args[idx++] = voffset ? voffset : ctx->i32_0; > > + args[idx++] = soffset ? soffset : ctx->i32_0; > > + args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), > > 0); > > + unsigned func = CLAMP(num_channels, 1, 3) - 1; > > + > > + LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32}; > > + const char *type_names[] = {"f32", "v2f32", "v4f32"}; > > + const char *indexing_kind = structurized ? "struct" : "raw"; > > + char name[256]; > > + > > + if (use_format) { > > + snprintf(name, sizeof(name), > > "llvm.amdgcn.%s.buffer.load.format.%s", > > + indexing_kind, type_names[func]); > > + } else { > > + snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s", > > + indexing_kind, type_names[func]); > > + } > > + > > + return ac_build_intrinsic(ctx, name, types[func], args, > > + idx, > > + ac_get_load_intr_attribs(can_speculate)); > > +} > > + > > LLVMValueRef > > ac_build_buffer_load(struct ac_llvm_context *ctx, > > LLVMValueRef rsrc, > > @@ -1218,6 +1259,11 @@ LLVMValueRef ac_build_buffer_load_format(struct > > ac_llvm_context *ctx, > > bool glc, > > bool can_speculate) > > { > > + if (HAVE_LLVM >= 0x800) { > > + return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, > > voffset, ctx->i32_0, > > + num_channels, glc, > > false, > > + can_speculate, true, > > true); > > + } > > return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, > > num_channels, glc, false, > > can_speculate, true); > > @@ -1231,6 +1277,12 @@ LLVMValueRef > > ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx, > > bool glc, > > bool can_speculate) > > { > > + if (HAVE_LLVM >= 0x800) { > > + return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, > > voffset, ctx->i32_0, > > + num_channels, glc, > > false, > > + can_speculate, true, > > true); > > + } > > + > > LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, > > LLVMConstInt(ctx->i32, 2, 0), ""); > > LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, > > ctx->i32_1, ""); > > stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, > > 16, 0), ""); > > diff --git a/src/amd/common/ac_nir_to_llvm.c > > b/src/amd/common/ac_nir_to_llvm.c > > index c950b81dca2..a19e66fe2a0 100644 > > --- a/src/amd/common/ac_nir_to_llvm.c > > +++ b/src/amd/common/ac_nir_to_llvm.c > > @@ -2387,10 +2387,17 @@ static void visit_image_store(struct ac_nir_context > > *ctx, > > params[2] = LLVMBuildExtractElement(ctx->ac.builder, > > get_src(ctx, instr->src[1]), > > ctx->ac.i32_0, ""); /* > > vindex */ > > params[3] = ctx->ac.i32_0; /* voffset */ > > - params[4] = glc; /* glc */ > > - params[5] = ctx->ac.i1false; /* slc */ > > - ac_build_intrinsic(&ctx->ac, > > "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt, > > - params, 6, 0); > > + if (HAVE_LLVM >= 0x800) { > > + params[4] = ctx->ac.i32_0; /* soffset */ > > + params[5] = glc ? ctx->ac.i32_1 : ctx->ac.i32_0; > > + ac_build_intrinsic(&ctx->ac, > > "llvm.amdgcn.struct.buffer.store.format.v4f32", ctx->ac.voidt, > > + params, 6, 0); > > No SLC for this one?
GLC + SLC are combined into a single arg as bitfield (1 = glc, 2 = slc), no mention of slc since it is always false. > > > + } else { > > + params[4] = glc; /* glc */ > > + params[5] = ctx->ac.i1false; /* slc */ > > + ac_build_intrinsic(&ctx->ac, > > "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt, > > + params, 6, 0); > > + } > > } else { > > struct ac_image_args args = {}; > > args.opcode = ac_image_store; > > @@ -2470,10 +2477,18 @@ static LLVMValueRef visit_image_atomic(struct > > ac_nir_context *ctx, > > params[param_count++] = > > LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), > > > > ctx->ac.i32_0, ""); /* vindex */ > > params[param_count++] = ctx->ac.i32_0; /* voffset */ > > - params[param_count++] = ctx->ac.i1false; /* slc */ > > + if (HAVE_LLVM >= 0x800) { > > + params[param_count++] = ctx->ac.i32_0; /* soffset */ > > + params[param_count++] = ctx->ac.i32_0; /* slc */ > > > > - length = snprintf(intrinsic_name, sizeof(intrinsic_name), > > - "llvm.amdgcn.buffer.atomic.%s", > > atomic_name); > > + length = snprintf(intrinsic_name, > > sizeof(intrinsic_name), > > + > > "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name); > > + } else { > > + params[param_count++] = ctx->ac.i1false; /* slc */ > > + > > + length = snprintf(intrinsic_name, > > sizeof(intrinsic_name), > > + "llvm.amdgcn.buffer.atomic.%s", > > atomic_name); > > + } > > > > assert(length < sizeof(intrinsic_name)); > > return ac_build_intrinsic(&ctx->ac, intrinsic_name, > > ctx->ac.i32, > > diff --git a/src/amd/vulkan/radv_nir_to_llvm.c > > b/src/amd/vulkan/radv_nir_to_llvm.c > > index f56eb01dc52..2e6f88ac342 100644 > > --- a/src/amd/vulkan/radv_nir_to_llvm.c > > +++ b/src/amd/vulkan/radv_nir_to_llvm.c > > @@ -3500,7 +3500,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct > > ac_llvm_compiler *ac_llvm, > > ctx.abi.load_sampler_desc = radv_get_sampler_desc; > > ctx.abi.load_resource = radv_load_resource; > > ctx.abi.clamp_shadow_reference = false; > > - ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9; > > + ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && > > HAVE_LLVM < 0x800; > > > > if (shader_count >= 2) > > ac_init_exec_full_mask(&ctx.ac); > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev