From: Marek Olšák <marek.ol...@amd.com>

---
 src/amd/common/ac_llvm_build.c                 | 18 +++++++++++++-----
 .../drivers/radeonsi/si_shader_tgsi_mem.c      |  4 ++--
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 76047148a6a..c0d90ada2be 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1234,25 +1234,33 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
        if (allow_smem && !glc && !slc) {
                assert(vindex == NULL);
 
                LLVMValueRef result[8];
 
                for (int i = 0; i < num_channels; i++) {
                        if (i) {
                                offset = LLVMBuildAdd(ctx->builder, offset,
                                                      LLVMConstInt(ctx->i32, 4, 
0), "");
                        }
-                       LLVMValueRef args[2] = {rsrc, offset};
-                       result[i] = ac_build_intrinsic(ctx, 
"llvm.SI.load.const.v4i32",
-                                                      ctx->f32, args, 2,
-                                                      AC_FUNC_ATTR_READNONE |
-                                                      AC_FUNC_ATTR_LEGACY);
+
+                       if (HAVE_LLVM >= 0x0800) {
+                               LLVMValueRef args[3] = {rsrc, offset, 
ctx->i32_0};
+                               result[i] = ac_build_intrinsic(ctx, 
"llvm.amdgcn.s.buffer.load.i32",
+                                                              ctx->f32, args, 
3,
+                                                              
AC_FUNC_ATTR_READNONE);
+                       } else {
+                               LLVMValueRef args[2] = {rsrc, offset};
+                               result[i] = ac_build_intrinsic(ctx, 
"llvm.SI.load.const.v4i32",
+                                                              ctx->f32, args, 
2,
+                                                              
AC_FUNC_ATTR_READNONE |
+                                                              
AC_FUNC_ATTR_LEGACY);
+                       }
                }
                if (num_channels == 1)
                        return result[0];
 
                if (num_channels == 3)
                        result[num_channels++] = LLVMGetUndef(ctx->f32);
                return ac_build_gather_values(ctx, result, num_channels);
        }
 
        return ac_build_buffer_load_common(ctx, rsrc, vindex, offset,
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 727def56f65..2f49685c642 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -533,24 +533,24 @@ static void load_emit(
                                                info->images_store |
                                                info->images_atomic,
                                                
info->uses_bindless_buffer_store |
                                                
info->uses_bindless_buffer_atomic,
                                                info->uses_bindless_image_store 
|
                                                
info->uses_bindless_image_atomic);
        args.cache_policy = get_cache_policy(ctx, inst, false, false, false);
 
        if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
                /* Don't use SMEM for shader buffer loads, because LLVM doesn't
-                * select SMEM for SI.load.const with a non-constant offset, and
+                * select SMEM for amdgcn.s.buffer.load with a non-constant 
offset, and
                 * constant offsets practically don't exist with shader buffers.
                 *
-                * Also, SI.load.const doesn't use inst_offset when it's lowered
+                * Also, amdgcn.s.buffer.load doesn't use inst_offset when it's 
lowered
                 * to VMEM, so we just end up with more VALU instructions in 
the end
                 * and no benefit.
                 *
                 * TODO: Remove this line once LLVM can select SMEM with a 
non-constant
                 *       offset, and can derive inst_offset when VMEM is 
selected.
                 *       After that, si_memory_barrier should invalidate sL1 
for shader
                 *       buffers.
                 */
                emit_data->output[emit_data->chan] =
                        ac_build_buffer_load(&ctx->ac, args.resource,
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to