From: Marek Olšák <[email protected]>
---
src/amd/common/ac_llvm_build.c | 18 +++++++++++++-----
.../drivers/radeonsi/si_shader_tgsi_mem.c | 4 ++--
2 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 76047148a6a..c0d90ada2be 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1234,25 +1234,33 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
if (allow_smem && !glc && !slc) {
assert(vindex == NULL);
LLVMValueRef result[8];
for (int i = 0; i < num_channels; i++) {
if (i) {
offset = LLVMBuildAdd(ctx->builder, offset,
LLVMConstInt(ctx->i32, 4,
0), "");
}
- LLVMValueRef args[2] = {rsrc, offset};
- result[i] = ac_build_intrinsic(ctx,
"llvm.SI.load.const.v4i32",
- ctx->f32, args, 2,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_LEGACY);
+
+ if (HAVE_LLVM >= 0x0800) {
+ LLVMValueRef args[3] = {rsrc, offset,
ctx->i32_0};
+ result[i] = ac_build_intrinsic(ctx,
"llvm.amdgcn.s.buffer.load.i32",
+ ctx->f32, args,
3,
+
AC_FUNC_ATTR_READNONE);
+ } else {
+ LLVMValueRef args[2] = {rsrc, offset};
+ result[i] = ac_build_intrinsic(ctx,
"llvm.SI.load.const.v4i32",
+ ctx->f32, args,
2,
+
AC_FUNC_ATTR_READNONE |
+
AC_FUNC_ATTR_LEGACY);
+ }
}
if (num_channels == 1)
return result[0];
if (num_channels == 3)
result[num_channels++] = LLVMGetUndef(ctx->f32);
return ac_build_gather_values(ctx, result, num_channels);
}
return ac_build_buffer_load_common(ctx, rsrc, vindex, offset,
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 727def56f65..2f49685c642 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -533,24 +533,24 @@ static void load_emit(
info->images_store |
info->images_atomic,
info->uses_bindless_buffer_store |
info->uses_bindless_buffer_atomic,
info->uses_bindless_image_store
|
info->uses_bindless_image_atomic);
args.cache_policy = get_cache_policy(ctx, inst, false, false, false);
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
/* Don't use SMEM for shader buffer loads, because LLVM doesn't
- * select SMEM for SI.load.const with a non-constant offset, and
+ * select SMEM for amdgcn.s.buffer.load with a non-constant
offset, and
* constant offsets practically don't exist with shader buffers.
*
- * Also, SI.load.const doesn't use inst_offset when it's lowered
+ * Also, amdgcn.s.buffer.load doesn't use inst_offset when it's
lowered
* to VMEM, so we just end up with more VALU instructions in
the end
* and no benefit.
*
* TODO: Remove this line once LLVM can select SMEM with a
non-constant
* offset, and can derive inst_offset when VMEM is
selected.
* After that, si_memory_barrier should invalidate sL1
for shader
* buffers.
*/
emit_data->output[emit_data->chan] =
ac_build_buffer_load(&ctx->ac, args.resource,
--
2.17.1
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev