Re: [Mesa-dev] [PATCH 6/6] ac: use llvm.amdgcn.s.buffer.load
Yes, I'll remove this patch. Marek On Sun, Jan 13, 2019 at 10:17 AM Bas Nieuwenhuizen wrote: > I think this is done with > > https://patchwork.freedesktop.org/series/55025/ > > ? > > On Sat, Jan 12, 2019 at 12:53 AM Marek Olšák wrote: > > > > From: Marek Olšák > > > > --- > > src/amd/common/ac_llvm_build.c | 18 +- > > .../drivers/radeonsi/si_shader_tgsi_mem.c | 4 ++-- > > 2 files changed, 15 insertions(+), 7 deletions(-) > > > > diff --git a/src/amd/common/ac_llvm_build.c > b/src/amd/common/ac_llvm_build.c > > index 76047148a6a..c0d90ada2be 100644 > > --- a/src/amd/common/ac_llvm_build.c > > +++ b/src/amd/common/ac_llvm_build.c > > @@ -1234,25 +1234,33 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, > > if (allow_smem && !glc && !slc) { > > assert(vindex == NULL); > > > > LLVMValueRef result[8]; > > > > for (int i = 0; i < num_channels; i++) { > > if (i) { > > offset = LLVMBuildAdd(ctx->builder, > offset, > > > LLVMConstInt(ctx->i32, 4, 0), ""); > > } > > - LLVMValueRef args[2] = {rsrc, offset}; > > - result[i] = ac_build_intrinsic(ctx, > "llvm.SI.load.const.v4i32", > > - ctx->f32, args, 2, > > - > AC_FUNC_ATTR_READNONE | > > - > AC_FUNC_ATTR_LEGACY); > > + > > + if (HAVE_LLVM >= 0x0800) { > > + LLVMValueRef args[3] = {rsrc, offset, > ctx->i32_0}; > > + result[i] = ac_build_intrinsic(ctx, > "llvm.amdgcn.s.buffer.load.i32", > > + ctx->f32, > args, 3, > > + > AC_FUNC_ATTR_READNONE); > > + } else { > > + LLVMValueRef args[2] = {rsrc, offset}; > > + result[i] = ac_build_intrinsic(ctx, > "llvm.SI.load.const.v4i32", > > + ctx->f32, > args, 2, > > + > AC_FUNC_ATTR_READNONE | > > + > AC_FUNC_ATTR_LEGACY); > > + } > > } > > if (num_channels == 1) > > return result[0]; > > > > if (num_channels == 3) > > result[num_channels++] = LLVMGetUndef(ctx->f32); > > return ac_build_gather_values(ctx, result, num_channels); > > } > > > > return ac_build_buffer_load_common(ctx, rsrc, vindex, offset, > > diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > > index 727def56f65..2f49685c642 100644 > > --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > > +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > > @@ -533,24 +533,24 @@ static void load_emit( > > info->images_store | > > info->images_atomic, > > > info->uses_bindless_buffer_store | > > > info->uses_bindless_buffer_atomic, > > > info->uses_bindless_image_store | > > > info->uses_bindless_image_atomic); > > args.cache_policy = get_cache_policy(ctx, inst, false, false, > false); > > > > if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { > > /* Don't use SMEM for shader buffer loads, because LLVM > doesn't > > -* select SMEM for SI.load.const with a non-constant > offset, and > > +* select SMEM for amdgcn.s.buffer.load with a > non-constant offset, and > > * constant offsets practically don't exist with shader > buffers. > > * > > -* Also, SI.load.const doesn't use inst_offset when it's > lowered > > +* Also, amdgcn.s.buffer.load doesn't use inst_offset > when it's lowered > > * to VMEM, so we just end up with more VALU > instructions in the end > > * and no benefit. > > * > > * TODO: Remove this line once LLVM can select SMEM with > a non-constant > > * offset, and can derive inst_offset when VMEM is > selected. > > * After that, si_memory_barrier should invalidate > sL1 for shader > > * buffers. > > */ > > emit_data->output[emit_data->chan] = > > ac_build_buffer_load(>ac, args.resource, > > -- > > 2.17.1 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/6] ac: use llvm.amdgcn.s.buffer.load
I think this is done with https://patchwork.freedesktop.org/series/55025/ ? On Sat, Jan 12, 2019 at 12:53 AM Marek Olšák wrote: > > From: Marek Olšák > > --- > src/amd/common/ac_llvm_build.c | 18 +- > .../drivers/radeonsi/si_shader_tgsi_mem.c | 4 ++-- > 2 files changed, 15 insertions(+), 7 deletions(-) > > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c > index 76047148a6a..c0d90ada2be 100644 > --- a/src/amd/common/ac_llvm_build.c > +++ b/src/amd/common/ac_llvm_build.c > @@ -1234,25 +1234,33 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, > if (allow_smem && !glc && !slc) { > assert(vindex == NULL); > > LLVMValueRef result[8]; > > for (int i = 0; i < num_channels; i++) { > if (i) { > offset = LLVMBuildAdd(ctx->builder, offset, > LLVMConstInt(ctx->i32, > 4, 0), ""); > } > - LLVMValueRef args[2] = {rsrc, offset}; > - result[i] = ac_build_intrinsic(ctx, > "llvm.SI.load.const.v4i32", > - ctx->f32, args, 2, > - AC_FUNC_ATTR_READNONE | > - AC_FUNC_ATTR_LEGACY); > + > + if (HAVE_LLVM >= 0x0800) { > + LLVMValueRef args[3] = {rsrc, offset, > ctx->i32_0}; > + result[i] = ac_build_intrinsic(ctx, > "llvm.amdgcn.s.buffer.load.i32", > + ctx->f32, > args, 3, > + > AC_FUNC_ATTR_READNONE); > + } else { > + LLVMValueRef args[2] = {rsrc, offset}; > + result[i] = ac_build_intrinsic(ctx, > "llvm.SI.load.const.v4i32", > + ctx->f32, > args, 2, > + > AC_FUNC_ATTR_READNONE | > + > AC_FUNC_ATTR_LEGACY); > + } > } > if (num_channels == 1) > return result[0]; > > if (num_channels == 3) > result[num_channels++] = LLVMGetUndef(ctx->f32); > return ac_build_gather_values(ctx, result, num_channels); > } > > return ac_build_buffer_load_common(ctx, rsrc, vindex, offset, > diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > index 727def56f65..2f49685c642 100644 > --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > @@ -533,24 +533,24 @@ static void load_emit( > info->images_store | > info->images_atomic, > > info->uses_bindless_buffer_store | > > info->uses_bindless_buffer_atomic, > > info->uses_bindless_image_store | > > info->uses_bindless_image_atomic); > args.cache_policy = get_cache_policy(ctx, inst, false, false, false); > > if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { > /* Don't use SMEM for shader buffer loads, because LLVM > doesn't > -* select SMEM for SI.load.const with a non-constant offset, > and > +* select SMEM for amdgcn.s.buffer.load with a non-constant > offset, and > * constant offsets practically don't exist with shader > buffers. > * > -* Also, SI.load.const doesn't use inst_offset when it's > lowered > +* Also, amdgcn.s.buffer.load doesn't use inst_offset when > it's lowered > * to VMEM, so we just end up with more VALU instructions in > the end > * and no benefit. > * > * TODO: Remove this line once LLVM can select SMEM with a > non-constant > * offset, and can derive inst_offset when VMEM is > selected. > * After that, si_memory_barrier should invalidate sL1 > for shader > * buffers. > */ > emit_data->output[emit_data->chan] = > ac_build_buffer_load(>ac, args.resource, > -- > 2.17.1 > > ___ > mesa-dev mailing list >
[Mesa-dev] [PATCH 6/6] ac: use llvm.amdgcn.s.buffer.load
From: Marek Olšák --- src/amd/common/ac_llvm_build.c | 18 +- .../drivers/radeonsi/si_shader_tgsi_mem.c | 4 ++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 76047148a6a..c0d90ada2be 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1234,25 +1234,33 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, if (allow_smem && !glc && !slc) { assert(vindex == NULL); LLVMValueRef result[8]; for (int i = 0; i < num_channels; i++) { if (i) { offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, 4, 0), ""); } - LLVMValueRef args[2] = {rsrc, offset}; - result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32", - ctx->f32, args, 2, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_LEGACY); + + if (HAVE_LLVM >= 0x0800) { + LLVMValueRef args[3] = {rsrc, offset, ctx->i32_0}; + result[i] = ac_build_intrinsic(ctx, "llvm.amdgcn.s.buffer.load.i32", + ctx->f32, args, 3, + AC_FUNC_ATTR_READNONE); + } else { + LLVMValueRef args[2] = {rsrc, offset}; + result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32", + ctx->f32, args, 2, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_LEGACY); + } } if (num_channels == 1) return result[0]; if (num_channels == 3) result[num_channels++] = LLVMGetUndef(ctx->f32); return ac_build_gather_values(ctx, result, num_channels); } return ac_build_buffer_load_common(ctx, rsrc, vindex, offset, diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 727def56f65..2f49685c642 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -533,24 +533,24 @@ static void load_emit( info->images_store | info->images_atomic, info->uses_bindless_buffer_store | info->uses_bindless_buffer_atomic, info->uses_bindless_image_store | info->uses_bindless_image_atomic); args.cache_policy = get_cache_policy(ctx, inst, false, false, false); if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { /* Don't use SMEM for shader buffer loads, because LLVM doesn't -* select SMEM for SI.load.const with a non-constant offset, and +* select SMEM for amdgcn.s.buffer.load with a non-constant offset, and * constant offsets practically don't exist with shader buffers. * -* Also, SI.load.const doesn't use inst_offset when it's lowered +* Also, amdgcn.s.buffer.load doesn't use inst_offset when it's lowered * to VMEM, so we just end up with more VALU instructions in the end * and no benefit. * * TODO: Remove this line once LLVM can select SMEM with a non-constant * offset, and can derive inst_offset when VMEM is selected. * After that, si_memory_barrier should invalidate sL1 for shader * buffers. */ emit_data->output[emit_data->chan] = ac_build_buffer_load(>ac, args.resource, -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev