Re: [Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.

2017-08-17 Thread Marek Olšák
On Thu, Aug 17, 2017 at 2:46 PM, Andres Gomez  wrote:
> Hi Dave,
>
> this patch has been collected for 17.2.0-rc3 but, for 17.1.x, the
> conflicts are not so trivial and the landed commit was tagged for
> stable without specifying the actual branch.
>
> Therefore, unless you intended this also for 17.1.x and would like to
> provide a backport, I'm more keen on dropping it for that branch.
>
> WDYT?

Let's drop it. It's not really a stable material.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.

2017-08-17 Thread Andres Gomez
Hi Dave,

this patch has been collected for 17.2.0-rc3 but, for 17.1.x, the
conflicts are not so trivial and the landed commit was tagged for
stable without specifying the actual branch.

Therefore, unless you intended this also for 17.1.x and would like to
provide a backport, I'm more keen on dropping it for that branch.

WDYT?

Br.

On Tue, 2017-08-01 at 14:14 +1000, Dave Airlie wrote:
> From: Dave Airlie 
> 
> This looks like it's supported since llvm 3.9 at least,
> so switch over radeonsi and radv to using it, -pro also
> uses this. We can now drop creating lds for these operations
> as the ds_swizzle operation doesn't actually write to lds at all.
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/common/ac_llvm_build.c   | 57 
> +++-
>  src/amd/common/ac_llvm_build.h   |  1 -
>  src/amd/common/ac_nir_to_llvm.c  |  9 +
>  src/gallium/drivers/radeonsi/si_shader.c | 16 +
>  4 files changed, 44 insertions(+), 39 deletions(-)
> 
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 9b939c1..a38aad6 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -796,21 +796,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
> bool has_ds_bpermute,
> uint32_t mask,
> int idx,
> -   LLVMValueRef lds,
> LLVMValueRef val)
>  {
> - LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
> + LLVMValueRef tl, trbl, args[2];
>   LLVMValueRef result;
>  
> - thread_id = ac_get_thread_id(ctx);
> + if (has_ds_bpermute) {
> + LLVMValueRef thread_id, tl_tid, trbl_tid;
> + thread_id = ac_get_thread_id(ctx);
>  
> - tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
> -   LLVMConstInt(ctx->i32, mask, false), "");
> + tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
> +   LLVMConstInt(ctx->i32, mask, false), "");
>  
> - trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
> - LLVMConstInt(ctx->i32, idx, false), "");
> + trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
> + LLVMConstInt(ctx->i32, idx, false), "");
>  
> - if (has_ds_bpermute) {
>   args[0] = LLVMBuildMul(ctx->builder, tl_tid,
>  LLVMConstInt(ctx->i32, 4, false), "");
>   args[1] = val;
> @@ -828,15 +828,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
> AC_FUNC_ATTR_READNONE |
> AC_FUNC_ATTR_CONVERGENT);
>   } else {
> - LLVMValueRef store_ptr, load_ptr0, load_ptr1;
> + uint32_t masks[2];
> +
> + switch (mask) {
> + case AC_TID_MASK_TOP_LEFT:
> + masks[0] = 0x8000;
> + if (idx == 1)
> + masks[1] = 0x8055;
> + else
> + masks[1] = 0x80aa;
> +
> + break;
> + case AC_TID_MASK_TOP:
> + masks[0] = 0x8044;
> + masks[1] = 0x80ee;
> + break;
> + case AC_TID_MASK_LEFT:
> + masks[0] = 0x80a0;
> + masks[1] = 0x80f5;
> + break;
> + }
>  
> - store_ptr = ac_build_gep0(ctx, lds, thread_id);
> - load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
> - load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
> + args[0] = val;
> + args[1] = LLVMConstInt(ctx->i32, masks[0], false);
>  
> - LLVMBuildStore(ctx->builder, val, store_ptr);
> - tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
> - trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
> + tl = ac_build_intrinsic(ctx,
> + "llvm.amdgcn.ds.swizzle", ctx->i32,
> + args, 2,
> + AC_FUNC_ATTR_READNONE |
> + AC_FUNC_ATTR_CONVERGENT);
> +
> + args[1] = LLVMConstInt(ctx->i32, masks[1], false);
> + trbl = ac_build_intrinsic(ctx,
> + "llvm.amdgcn.ds.swizzle", ctx->i32,
> + args, 2,
> + AC_FUNC_ATTR_READNONE |
> + AC_FUNC_ATTR_CONVERGENT);
>   }
>  
>   tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 09fd585..ee27d3c 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -174,7 +174,6 @@ ac_build_ddxy(struct ac_llvm_context 

Re: [Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.

2017-08-01 Thread Marek Olšák
Tested on SI.

Acked-by: Marek Olšák 

Marek

On Tue, Aug 1, 2017 at 6:14 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This looks like it's supported since llvm 3.9 at least,
> so switch over radeonsi and radv to using it, -pro also
> uses this. We can now drop creating lds for these operations
> as the ds_swizzle operation doesn't actually write to lds at all.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/common/ac_llvm_build.c   | 57 
> +++-
>  src/amd/common/ac_llvm_build.h   |  1 -
>  src/amd/common/ac_nir_to_llvm.c  |  9 +
>  src/gallium/drivers/radeonsi/si_shader.c | 16 +
>  4 files changed, 44 insertions(+), 39 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 9b939c1..a38aad6 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -796,21 +796,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
>   bool has_ds_bpermute,
>   uint32_t mask,
>   int idx,
> - LLVMValueRef lds,
>   LLVMValueRef val)
>  {
> -   LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
> +   LLVMValueRef tl, trbl, args[2];
> LLVMValueRef result;
>
> -   thread_id = ac_get_thread_id(ctx);
> +   if (has_ds_bpermute) {
> +   LLVMValueRef thread_id, tl_tid, trbl_tid;
> +   thread_id = ac_get_thread_id(ctx);
>
> -   tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
> - LLVMConstInt(ctx->i32, mask, false), "");
> +   tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
> + LLVMConstInt(ctx->i32, mask, false), 
> "");
>
> -   trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
> -   LLVMConstInt(ctx->i32, idx, false), "");
> +   trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
> +   LLVMConstInt(ctx->i32, idx, false), 
> "");
>
> -   if (has_ds_bpermute) {
> args[0] = LLVMBuildMul(ctx->builder, tl_tid,
>LLVMConstInt(ctx->i32, 4, false), "");
> args[1] = val;
> @@ -828,15 +828,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
>   AC_FUNC_ATTR_READNONE |
>   AC_FUNC_ATTR_CONVERGENT);
> } else {
> -   LLVMValueRef store_ptr, load_ptr0, load_ptr1;
> +   uint32_t masks[2];
> +
> +   switch (mask) {
> +   case AC_TID_MASK_TOP_LEFT:
> +   masks[0] = 0x8000;
> +   if (idx == 1)
> +   masks[1] = 0x8055;
> +   else
> +   masks[1] = 0x80aa;
> +
> +   break;
> +   case AC_TID_MASK_TOP:
> +   masks[0] = 0x8044;
> +   masks[1] = 0x80ee;
> +   break;
> +   case AC_TID_MASK_LEFT:
> +   masks[0] = 0x80a0;
> +   masks[1] = 0x80f5;
> +   break;
> +   }
>
> -   store_ptr = ac_build_gep0(ctx, lds, thread_id);
> -   load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
> -   load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
> +   args[0] = val;
> +   args[1] = LLVMConstInt(ctx->i32, masks[0], false);
>
> -   LLVMBuildStore(ctx->builder, val, store_ptr);
> -   tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
> -   trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
> +   tl = ac_build_intrinsic(ctx,
> +   "llvm.amdgcn.ds.swizzle", ctx->i32,
> +   args, 2,
> +   AC_FUNC_ATTR_READNONE |
> +   AC_FUNC_ATTR_CONVERGENT);
> +
> +   args[1] = LLVMConstInt(ctx->i32, masks[1], false);
> +   trbl = ac_build_intrinsic(ctx,
> +   "llvm.amdgcn.ds.swizzle", ctx->i32,
> +   args, 2,
> +   AC_FUNC_ATTR_READNONE |
> +   AC_FUNC_ATTR_CONVERGENT);
> }
>
> tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 09fd585..ee27d3c 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -174,7 +174,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
>   bool has_ds_bpermute,
>   uint32_t mask,
>   int idx,
> - LLVMValueRef lds,

Re: [Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.

2017-07-31 Thread Dave Airlie
On 1 August 2017 at 14:14, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This looks like it's supported since llvm 3.9 at least,
> so switch over radeonsi and radv to using it, -pro also
> uses this. We can now drop creating lds for these operations
> as the ds_swizzle operation doesn't actually write to lds at all.

This also fixes a bunch of multisample interpolation tests on
radv on CIK.

Not 100% sure why.
Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.

2017-07-31 Thread Dave Airlie
From: Dave Airlie 

This looks like it's supported since llvm 3.9 at least,
so switch over radeonsi and radv to using it, -pro also
uses this. We can now drop creating lds for these operations
as the ds_swizzle operation doesn't actually write to lds at all.

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_llvm_build.c   | 57 +++-
 src/amd/common/ac_llvm_build.h   |  1 -
 src/amd/common/ac_nir_to_llvm.c  |  9 +
 src/gallium/drivers/radeonsi/si_shader.c | 16 +
 4 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 9b939c1..a38aad6 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -796,21 +796,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
  bool has_ds_bpermute,
  uint32_t mask,
  int idx,
- LLVMValueRef lds,
  LLVMValueRef val)
 {
-   LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
+   LLVMValueRef tl, trbl, args[2];
LLVMValueRef result;
 
-   thread_id = ac_get_thread_id(ctx);
+   if (has_ds_bpermute) {
+   LLVMValueRef thread_id, tl_tid, trbl_tid;
+   thread_id = ac_get_thread_id(ctx);
 
-   tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
- LLVMConstInt(ctx->i32, mask, false), "");
+   tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
+ LLVMConstInt(ctx->i32, mask, false), "");
 
-   trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
-   LLVMConstInt(ctx->i32, idx, false), "");
+   trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
+   LLVMConstInt(ctx->i32, idx, false), "");
 
-   if (has_ds_bpermute) {
args[0] = LLVMBuildMul(ctx->builder, tl_tid,
   LLVMConstInt(ctx->i32, 4, false), "");
args[1] = val;
@@ -828,15 +828,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
  AC_FUNC_ATTR_READNONE |
  AC_FUNC_ATTR_CONVERGENT);
} else {
-   LLVMValueRef store_ptr, load_ptr0, load_ptr1;
+   uint32_t masks[2];
+
+   switch (mask) {
+   case AC_TID_MASK_TOP_LEFT:
+   masks[0] = 0x8000;
+   if (idx == 1)
+   masks[1] = 0x8055;
+   else
+   masks[1] = 0x80aa;
+
+   break;
+   case AC_TID_MASK_TOP:
+   masks[0] = 0x8044;
+   masks[1] = 0x80ee;
+   break;
+   case AC_TID_MASK_LEFT:
+   masks[0] = 0x80a0;
+   masks[1] = 0x80f5;
+   break;
+   }
 
-   store_ptr = ac_build_gep0(ctx, lds, thread_id);
-   load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
-   load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
+   args[0] = val;
+   args[1] = LLVMConstInt(ctx->i32, masks[0], false);
 
-   LLVMBuildStore(ctx->builder, val, store_ptr);
-   tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
-   trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
+   tl = ac_build_intrinsic(ctx,
+   "llvm.amdgcn.ds.swizzle", ctx->i32,
+   args, 2,
+   AC_FUNC_ATTR_READNONE |
+   AC_FUNC_ATTR_CONVERGENT);
+
+   args[1] = LLVMConstInt(ctx->i32, masks[1], false);
+   trbl = ac_build_intrinsic(ctx,
+   "llvm.amdgcn.ds.swizzle", ctx->i32,
+   args, 2,
+   AC_FUNC_ATTR_READNONE |
+   AC_FUNC_ATTR_CONVERGENT);
}
 
tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 09fd585..ee27d3c 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -174,7 +174,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
  bool has_ds_bpermute,
  uint32_t mask,
  int idx,
- LLVMValueRef lds,
  LLVMValueRef val);
 
 #define AC_SENDMSG_GS 2
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 530b581..dc765fe 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -68,8 +68,6 @@ struct ac_nir_context {
int num_locals;
LLVMValueRef *locals;
 
-