Re: [Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.
On Thu, Aug 17, 2017 at 2:46 PM, Andres Gomezwrote: > Hi Dave, > > this patch has been collected for 17.2.0-rc3 but, for 17.1.x, the > conflicts are not so trivial and the landed commit was tagged for > stable without specifying the actual branch. > > Therefore, unless you intended this also for 17.1.x and would like to > provide a backport, I'm more keen on dropping it for that branch. > > WDYT? Let's drop it. It's not really a stable material. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.
Hi Dave, this patch has been collected for 17.2.0-rc3 but, for 17.1.x, the conflicts are not so trivial and the landed commit was tagged for stable without specifying the actual branch. Therefore, unless you intended this also for 17.1.x and would like to provide a backport, I'm more keen on dropping it for that branch. WDYT? Br. On Tue, 2017-08-01 at 14:14 +1000, Dave Airlie wrote: > From: Dave Airlie> > This looks like it's supported since llvm 3.9 at least, > so switch over radeonsi and radv to using it, -pro also > uses this. We can now drop creating lds for these operations > as the ds_swizzle operation doesn't actually write to lds at all. > > Signed-off-by: Dave Airlie > --- > src/amd/common/ac_llvm_build.c | 57 > +++- > src/amd/common/ac_llvm_build.h | 1 - > src/amd/common/ac_nir_to_llvm.c | 9 + > src/gallium/drivers/radeonsi/si_shader.c | 16 + > 4 files changed, 44 insertions(+), 39 deletions(-) > > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c > index 9b939c1..a38aad6 100644 > --- a/src/amd/common/ac_llvm_build.c > +++ b/src/amd/common/ac_llvm_build.c > @@ -796,21 +796,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx, > bool has_ds_bpermute, > uint32_t mask, > int idx, > - LLVMValueRef lds, > LLVMValueRef val) > { > - LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2]; > + LLVMValueRef tl, trbl, args[2]; > LLVMValueRef result; > > - thread_id = ac_get_thread_id(ctx); > + if (has_ds_bpermute) { > + LLVMValueRef thread_id, tl_tid, trbl_tid; > + thread_id = ac_get_thread_id(ctx); > > - tl_tid = LLVMBuildAnd(ctx->builder, thread_id, > - LLVMConstInt(ctx->i32, mask, false), ""); > + tl_tid = LLVMBuildAnd(ctx->builder, thread_id, > + LLVMConstInt(ctx->i32, mask, false), ""); > > - trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, > - LLVMConstInt(ctx->i32, idx, false), ""); > + trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, > + LLVMConstInt(ctx->i32, idx, false), ""); > > - if (has_ds_bpermute) { > args[0] = LLVMBuildMul(ctx->builder, tl_tid, > LLVMConstInt(ctx->i32, 4, false), ""); > args[1] = val; > @@ -828,15 +828,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx, > AC_FUNC_ATTR_READNONE | > AC_FUNC_ATTR_CONVERGENT); > } else { > - LLVMValueRef store_ptr, load_ptr0, load_ptr1; > + uint32_t masks[2]; > + > + switch (mask) { > + case AC_TID_MASK_TOP_LEFT: > + masks[0] = 0x8000; > + if (idx == 1) > + masks[1] = 0x8055; > + else > + masks[1] = 0x80aa; > + > + break; > + case AC_TID_MASK_TOP: > + masks[0] = 0x8044; > + masks[1] = 0x80ee; > + break; > + case AC_TID_MASK_LEFT: > + masks[0] = 0x80a0; > + masks[1] = 0x80f5; > + break; > + } > > - store_ptr = ac_build_gep0(ctx, lds, thread_id); > - load_ptr0 = ac_build_gep0(ctx, lds, tl_tid); > - load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid); > + args[0] = val; > + args[1] = LLVMConstInt(ctx->i32, masks[0], false); > > - LLVMBuildStore(ctx->builder, val, store_ptr); > - tl = LLVMBuildLoad(ctx->builder, load_ptr0, ""); > - trbl = LLVMBuildLoad(ctx->builder, load_ptr1, ""); > + tl = ac_build_intrinsic(ctx, > + "llvm.amdgcn.ds.swizzle", ctx->i32, > + args, 2, > + AC_FUNC_ATTR_READNONE | > + AC_FUNC_ATTR_CONVERGENT); > + > + args[1] = LLVMConstInt(ctx->i32, masks[1], false); > + trbl = ac_build_intrinsic(ctx, > + "llvm.amdgcn.ds.swizzle", ctx->i32, > + args, 2, > + AC_FUNC_ATTR_READNONE | > + AC_FUNC_ATTR_CONVERGENT); > } > > tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); > diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h > index 09fd585..ee27d3c 100644 > --- a/src/amd/common/ac_llvm_build.h > +++ b/src/amd/common/ac_llvm_build.h > @@ -174,7 +174,6 @@ ac_build_ddxy(struct ac_llvm_context
Re: [Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.
Tested on SI. Acked-by: Marek OlšákMarek On Tue, Aug 1, 2017 at 6:14 AM, Dave Airlie wrote: > From: Dave Airlie > > This looks like it's supported since llvm 3.9 at least, > so switch over radeonsi and radv to using it, -pro also > uses this. We can now drop creating lds for these operations > as the ds_swizzle operation doesn't actually write to lds at all. > > Signed-off-by: Dave Airlie > --- > src/amd/common/ac_llvm_build.c | 57 > +++- > src/amd/common/ac_llvm_build.h | 1 - > src/amd/common/ac_nir_to_llvm.c | 9 + > src/gallium/drivers/radeonsi/si_shader.c | 16 + > 4 files changed, 44 insertions(+), 39 deletions(-) > > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c > index 9b939c1..a38aad6 100644 > --- a/src/amd/common/ac_llvm_build.c > +++ b/src/amd/common/ac_llvm_build.c > @@ -796,21 +796,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx, > bool has_ds_bpermute, > uint32_t mask, > int idx, > - LLVMValueRef lds, > LLVMValueRef val) > { > - LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2]; > + LLVMValueRef tl, trbl, args[2]; > LLVMValueRef result; > > - thread_id = ac_get_thread_id(ctx); > + if (has_ds_bpermute) { > + LLVMValueRef thread_id, tl_tid, trbl_tid; > + thread_id = ac_get_thread_id(ctx); > > - tl_tid = LLVMBuildAnd(ctx->builder, thread_id, > - LLVMConstInt(ctx->i32, mask, false), ""); > + tl_tid = LLVMBuildAnd(ctx->builder, thread_id, > + LLVMConstInt(ctx->i32, mask, false), > ""); > > - trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, > - LLVMConstInt(ctx->i32, idx, false), ""); > + trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, > + LLVMConstInt(ctx->i32, idx, false), > ""); > > - if (has_ds_bpermute) { > args[0] = LLVMBuildMul(ctx->builder, tl_tid, >LLVMConstInt(ctx->i32, 4, false), ""); > args[1] = val; > @@ -828,15 +828,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx, > AC_FUNC_ATTR_READNONE | > AC_FUNC_ATTR_CONVERGENT); > } else { > - LLVMValueRef store_ptr, load_ptr0, load_ptr1; > + uint32_t masks[2]; > + > + switch (mask) { > + case AC_TID_MASK_TOP_LEFT: > + masks[0] = 0x8000; > + if (idx == 1) > + masks[1] = 0x8055; > + else > + masks[1] = 0x80aa; > + > + break; > + case AC_TID_MASK_TOP: > + masks[0] = 0x8044; > + masks[1] = 0x80ee; > + break; > + case AC_TID_MASK_LEFT: > + masks[0] = 0x80a0; > + masks[1] = 0x80f5; > + break; > + } > > - store_ptr = ac_build_gep0(ctx, lds, thread_id); > - load_ptr0 = ac_build_gep0(ctx, lds, tl_tid); > - load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid); > + args[0] = val; > + args[1] = LLVMConstInt(ctx->i32, masks[0], false); > > - LLVMBuildStore(ctx->builder, val, store_ptr); > - tl = LLVMBuildLoad(ctx->builder, load_ptr0, ""); > - trbl = LLVMBuildLoad(ctx->builder, load_ptr1, ""); > + tl = ac_build_intrinsic(ctx, > + "llvm.amdgcn.ds.swizzle", ctx->i32, > + args, 2, > + AC_FUNC_ATTR_READNONE | > + AC_FUNC_ATTR_CONVERGENT); > + > + args[1] = LLVMConstInt(ctx->i32, masks[1], false); > + trbl = ac_build_intrinsic(ctx, > + "llvm.amdgcn.ds.swizzle", ctx->i32, > + args, 2, > + AC_FUNC_ATTR_READNONE | > + AC_FUNC_ATTR_CONVERGENT); > } > > tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); > diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h > index 09fd585..ee27d3c 100644 > --- a/src/amd/common/ac_llvm_build.h > +++ b/src/amd/common/ac_llvm_build.h > @@ -174,7 +174,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx, > bool has_ds_bpermute, > uint32_t mask, > int idx, > - LLVMValueRef lds,
Re: [Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.
On 1 August 2017 at 14:14, Dave Airliewrote: > From: Dave Airlie > > This looks like it's supported since llvm 3.9 at least, > so switch over radeonsi and radv to using it, -pro also > uses this. We can now drop creating lds for these operations > as the ds_swizzle operation doesn't actually write to lds at all. This also fixes a bunch of multisample interpolation tests on radv on CIK. Not 100% sure why. Dave. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.
From: Dave AirlieThis looks like it's supported since llvm 3.9 at least, so switch over radeonsi and radv to using it, -pro also uses this. We can now drop creating lds for these operations as the ds_swizzle operation doesn't actually write to lds at all. Signed-off-by: Dave Airlie --- src/amd/common/ac_llvm_build.c | 57 +++- src/amd/common/ac_llvm_build.h | 1 - src/amd/common/ac_nir_to_llvm.c | 9 + src/gallium/drivers/radeonsi/si_shader.c | 16 + 4 files changed, 44 insertions(+), 39 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 9b939c1..a38aad6 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -796,21 +796,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx, bool has_ds_bpermute, uint32_t mask, int idx, - LLVMValueRef lds, LLVMValueRef val) { - LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2]; + LLVMValueRef tl, trbl, args[2]; LLVMValueRef result; - thread_id = ac_get_thread_id(ctx); + if (has_ds_bpermute) { + LLVMValueRef thread_id, tl_tid, trbl_tid; + thread_id = ac_get_thread_id(ctx); - tl_tid = LLVMBuildAnd(ctx->builder, thread_id, - LLVMConstInt(ctx->i32, mask, false), ""); + tl_tid = LLVMBuildAnd(ctx->builder, thread_id, + LLVMConstInt(ctx->i32, mask, false), ""); - trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, - LLVMConstInt(ctx->i32, idx, false), ""); + trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, + LLVMConstInt(ctx->i32, idx, false), ""); - if (has_ds_bpermute) { args[0] = LLVMBuildMul(ctx->builder, tl_tid, LLVMConstInt(ctx->i32, 4, false), ""); args[1] = val; @@ -828,15 +828,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); } else { - LLVMValueRef store_ptr, load_ptr0, load_ptr1; + uint32_t masks[2]; + + switch (mask) { + case AC_TID_MASK_TOP_LEFT: + masks[0] = 0x8000; + if (idx == 1) + masks[1] = 0x8055; + else + masks[1] = 0x80aa; + + break; + case AC_TID_MASK_TOP: + masks[0] = 0x8044; + masks[1] = 0x80ee; + break; + case AC_TID_MASK_LEFT: + masks[0] = 0x80a0; + masks[1] = 0x80f5; + break; + } - store_ptr = ac_build_gep0(ctx, lds, thread_id); - load_ptr0 = ac_build_gep0(ctx, lds, tl_tid); - load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid); + args[0] = val; + args[1] = LLVMConstInt(ctx->i32, masks[0], false); - LLVMBuildStore(ctx->builder, val, store_ptr); - tl = LLVMBuildLoad(ctx->builder, load_ptr0, ""); - trbl = LLVMBuildLoad(ctx->builder, load_ptr1, ""); + tl = ac_build_intrinsic(ctx, + "llvm.amdgcn.ds.swizzle", ctx->i32, + args, 2, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_CONVERGENT); + + args[1] = LLVMConstInt(ctx->i32, masks[1], false); + trbl = ac_build_intrinsic(ctx, + "llvm.amdgcn.ds.swizzle", ctx->i32, + args, 2, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_CONVERGENT); } tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 09fd585..ee27d3c 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -174,7 +174,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx, bool has_ds_bpermute, uint32_t mask, int idx, - LLVMValueRef lds, LLVMValueRef val); #define AC_SENDMSG_GS 2 diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 530b581..dc765fe 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -68,8 +68,6 @@ struct ac_nir_context { int num_locals; LLVMValueRef *locals; -