Re: [Mesa-dev] [PATCH 6/6] ac: use llvm.amdgcn.s.buffer.load

2019-01-14 Thread Marek Olšák
Yes, I'll remove this patch.

Marek

On Sun, Jan 13, 2019 at 10:17 AM Bas Nieuwenhuizen 
wrote:

> I think this is done with
>
> https://patchwork.freedesktop.org/series/55025/
>
> ?
>
> On Sat, Jan 12, 2019 at 12:53 AM Marek Olšák  wrote:
> >
> > From: Marek Olšák 
> >
> > ---
> >  src/amd/common/ac_llvm_build.c | 18 +-
> >  .../drivers/radeonsi/si_shader_tgsi_mem.c  |  4 ++--
> >  2 files changed, 15 insertions(+), 7 deletions(-)
> >
> > diff --git a/src/amd/common/ac_llvm_build.c
> b/src/amd/common/ac_llvm_build.c
> > index 76047148a6a..c0d90ada2be 100644
> > --- a/src/amd/common/ac_llvm_build.c
> > +++ b/src/amd/common/ac_llvm_build.c
> > @@ -1234,25 +1234,33 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
> > if (allow_smem && !glc && !slc) {
> > assert(vindex == NULL);
> >
> > LLVMValueRef result[8];
> >
> > for (int i = 0; i < num_channels; i++) {
> > if (i) {
> > offset = LLVMBuildAdd(ctx->builder,
> offset,
> >
>  LLVMConstInt(ctx->i32, 4, 0), "");
> > }
> > -   LLVMValueRef args[2] = {rsrc, offset};
> > -   result[i] = ac_build_intrinsic(ctx,
> "llvm.SI.load.const.v4i32",
> > -  ctx->f32, args, 2,
> > -
> AC_FUNC_ATTR_READNONE |
> > -
> AC_FUNC_ATTR_LEGACY);
> > +
> > +   if (HAVE_LLVM >= 0x0800) {
> > +   LLVMValueRef args[3] = {rsrc, offset,
> ctx->i32_0};
> > +   result[i] = ac_build_intrinsic(ctx,
> "llvm.amdgcn.s.buffer.load.i32",
> > +  ctx->f32,
> args, 3,
> > +
> AC_FUNC_ATTR_READNONE);
> > +   } else {
> > +   LLVMValueRef args[2] = {rsrc, offset};
> > +   result[i] = ac_build_intrinsic(ctx,
> "llvm.SI.load.const.v4i32",
> > +  ctx->f32,
> args, 2,
> > +
> AC_FUNC_ATTR_READNONE |
> > +
> AC_FUNC_ATTR_LEGACY);
> > +   }
> > }
> > if (num_channels == 1)
> > return result[0];
> >
> > if (num_channels == 3)
> > result[num_channels++] = LLVMGetUndef(ctx->f32);
> > return ac_build_gather_values(ctx, result, num_channels);
> > }
> >
> > return ac_build_buffer_load_common(ctx, rsrc, vindex, offset,
> > diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> > index 727def56f65..2f49685c642 100644
> > --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> > +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> > @@ -533,24 +533,24 @@ static void load_emit(
> > info->images_store |
> > info->images_atomic,
> >
>  info->uses_bindless_buffer_store |
> >
>  info->uses_bindless_buffer_atomic,
> >
>  info->uses_bindless_image_store |
> >
>  info->uses_bindless_image_atomic);
> > args.cache_policy = get_cache_policy(ctx, inst, false, false,
> false);
> >
> > if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
> > /* Don't use SMEM for shader buffer loads, because LLVM
> doesn't
> > -* select SMEM for SI.load.const with a non-constant
> offset, and
> > +* select SMEM for amdgcn.s.buffer.load with a
> non-constant offset, and
> >  * constant offsets practically don't exist with shader
> buffers.
> >  *
> > -* Also, SI.load.const doesn't use inst_offset when it's
> lowered
> > +* Also, amdgcn.s.buffer.load doesn't use inst_offset
> when it's lowered
> >  * to VMEM, so we just end up with more VALU
> instructions in the end
> >  * and no benefit.
> >  *
> >  * TODO: Remove this line once LLVM can select SMEM with
> a non-constant
> >  *   offset, and can derive inst_offset when VMEM is
> selected.
> >  *   After that, si_memory_barrier should invalidate
> sL1 for shader
> >  *   buffers.
> >  */
> > emit_data->output[emit_data->chan] =
> > ac_build_buffer_load(>ac, args.resource,
> > --
> > 2.17.1
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] ac: use llvm.amdgcn.s.buffer.load

2019-01-13 Thread Bas Nieuwenhuizen
I think this is done with

https://patchwork.freedesktop.org/series/55025/

?

On Sat, Jan 12, 2019 at 12:53 AM Marek Olšák  wrote:
>
> From: Marek Olšák 
>
> ---
>  src/amd/common/ac_llvm_build.c | 18 +-
>  .../drivers/radeonsi/si_shader_tgsi_mem.c  |  4 ++--
>  2 files changed, 15 insertions(+), 7 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 76047148a6a..c0d90ada2be 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1234,25 +1234,33 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
> if (allow_smem && !glc && !slc) {
> assert(vindex == NULL);
>
> LLVMValueRef result[8];
>
> for (int i = 0; i < num_channels; i++) {
> if (i) {
> offset = LLVMBuildAdd(ctx->builder, offset,
>   LLVMConstInt(ctx->i32, 
> 4, 0), "");
> }
> -   LLVMValueRef args[2] = {rsrc, offset};
> -   result[i] = ac_build_intrinsic(ctx, 
> "llvm.SI.load.const.v4i32",
> -  ctx->f32, args, 2,
> -  AC_FUNC_ATTR_READNONE |
> -  AC_FUNC_ATTR_LEGACY);
> +
> +   if (HAVE_LLVM >= 0x0800) {
> +   LLVMValueRef args[3] = {rsrc, offset, 
> ctx->i32_0};
> +   result[i] = ac_build_intrinsic(ctx, 
> "llvm.amdgcn.s.buffer.load.i32",
> +  ctx->f32, 
> args, 3,
> +  
> AC_FUNC_ATTR_READNONE);
> +   } else {
> +   LLVMValueRef args[2] = {rsrc, offset};
> +   result[i] = ac_build_intrinsic(ctx, 
> "llvm.SI.load.const.v4i32",
> +  ctx->f32, 
> args, 2,
> +  
> AC_FUNC_ATTR_READNONE |
> +  
> AC_FUNC_ATTR_LEGACY);
> +   }
> }
> if (num_channels == 1)
> return result[0];
>
> if (num_channels == 3)
> result[num_channels++] = LLVMGetUndef(ctx->f32);
> return ac_build_gather_values(ctx, result, num_channels);
> }
>
> return ac_build_buffer_load_common(ctx, rsrc, vindex, offset,
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
> b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> index 727def56f65..2f49685c642 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> @@ -533,24 +533,24 @@ static void load_emit(
> info->images_store |
> info->images_atomic,
> 
> info->uses_bindless_buffer_store |
> 
> info->uses_bindless_buffer_atomic,
> 
> info->uses_bindless_image_store |
> 
> info->uses_bindless_image_atomic);
> args.cache_policy = get_cache_policy(ctx, inst, false, false, false);
>
> if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
> /* Don't use SMEM for shader buffer loads, because LLVM 
> doesn't
> -* select SMEM for SI.load.const with a non-constant offset, 
> and
> +* select SMEM for amdgcn.s.buffer.load with a non-constant 
> offset, and
>  * constant offsets practically don't exist with shader 
> buffers.
>  *
> -* Also, SI.load.const doesn't use inst_offset when it's 
> lowered
> +* Also, amdgcn.s.buffer.load doesn't use inst_offset when 
> it's lowered
>  * to VMEM, so we just end up with more VALU instructions in 
> the end
>  * and no benefit.
>  *
>  * TODO: Remove this line once LLVM can select SMEM with a 
> non-constant
>  *   offset, and can derive inst_offset when VMEM is 
> selected.
>  *   After that, si_memory_barrier should invalidate sL1 
> for shader
>  *   buffers.
>  */
> emit_data->output[emit_data->chan] =
> ac_build_buffer_load(>ac, args.resource,
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> 

[Mesa-dev] [PATCH 6/6] ac: use llvm.amdgcn.s.buffer.load

2019-01-11 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_llvm_build.c | 18 +-
 .../drivers/radeonsi/si_shader_tgsi_mem.c  |  4 ++--
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 76047148a6a..c0d90ada2be 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1234,25 +1234,33 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
if (allow_smem && !glc && !slc) {
assert(vindex == NULL);
 
LLVMValueRef result[8];
 
for (int i = 0; i < num_channels; i++) {
if (i) {
offset = LLVMBuildAdd(ctx->builder, offset,
  LLVMConstInt(ctx->i32, 4, 
0), "");
}
-   LLVMValueRef args[2] = {rsrc, offset};
-   result[i] = ac_build_intrinsic(ctx, 
"llvm.SI.load.const.v4i32",
-  ctx->f32, args, 2,
-  AC_FUNC_ATTR_READNONE |
-  AC_FUNC_ATTR_LEGACY);
+
+   if (HAVE_LLVM >= 0x0800) {
+   LLVMValueRef args[3] = {rsrc, offset, 
ctx->i32_0};
+   result[i] = ac_build_intrinsic(ctx, 
"llvm.amdgcn.s.buffer.load.i32",
+  ctx->f32, args, 
3,
+  
AC_FUNC_ATTR_READNONE);
+   } else {
+   LLVMValueRef args[2] = {rsrc, offset};
+   result[i] = ac_build_intrinsic(ctx, 
"llvm.SI.load.const.v4i32",
+  ctx->f32, args, 
2,
+  
AC_FUNC_ATTR_READNONE |
+  
AC_FUNC_ATTR_LEGACY);
+   }
}
if (num_channels == 1)
return result[0];
 
if (num_channels == 3)
result[num_channels++] = LLVMGetUndef(ctx->f32);
return ac_build_gather_values(ctx, result, num_channels);
}
 
return ac_build_buffer_load_common(ctx, rsrc, vindex, offset,
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 727def56f65..2f49685c642 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -533,24 +533,24 @@ static void load_emit(
info->images_store |
info->images_atomic,

info->uses_bindless_buffer_store |

info->uses_bindless_buffer_atomic,
info->uses_bindless_image_store 
|

info->uses_bindless_image_atomic);
args.cache_policy = get_cache_policy(ctx, inst, false, false, false);
 
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
/* Don't use SMEM for shader buffer loads, because LLVM doesn't
-* select SMEM for SI.load.const with a non-constant offset, and
+* select SMEM for amdgcn.s.buffer.load with a non-constant 
offset, and
 * constant offsets practically don't exist with shader buffers.
 *
-* Also, SI.load.const doesn't use inst_offset when it's lowered
+* Also, amdgcn.s.buffer.load doesn't use inst_offset when it's 
lowered
 * to VMEM, so we just end up with more VALU instructions in 
the end
 * and no benefit.
 *
 * TODO: Remove this line once LLVM can select SMEM with a 
non-constant
 *   offset, and can derive inst_offset when VMEM is 
selected.
 *   After that, si_memory_barrier should invalidate sL1 
for shader
 *   buffers.
 */
emit_data->output[emit_data->chan] =
ac_build_buffer_load(>ac, args.resource,
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev