From: Marek Olšák <[email protected]>

---
 src/gallium/drivers/radeonsi/si_shader.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 67ab16b..414810e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5007,62 +5007,66 @@ static void si_llvm_emit_ddxy(
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        const struct tgsi_full_instruction *inst = emit_data->inst;
        unsigned opcode = inst->Instruction.Opcode;
        LLVMValueRef store_ptr, load_ptr0, load_ptr1, thread_id;
        LLVMValueRef tl, trbl, result[4];
        LLVMValueRef tl_tid, trbl_tid;
        unsigned swizzle[4];
        unsigned c;
        int idx;
        unsigned mask;
+       bool has_ds_bpermute = HAVE_LLVM >= 0x0309 &&
+                              ctx->screen->b.chip_class >= VI;
 
        thread_id = get_thread_id(ctx);;
-       store_ptr = build_gep0(ctx, ctx->lds, thread_id);
 
        if (opcode == TGSI_OPCODE_DDX_FINE)
                mask = TID_MASK_LEFT;
        else if (opcode == TGSI_OPCODE_DDY_FINE)
                mask = TID_MASK_TOP;
        else
                mask = TID_MASK_TOP_LEFT;
 
        tl_tid = LLVMBuildAnd(gallivm->builder, thread_id,
                                lp_build_const_int32(gallivm, mask), "");
-       load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
 
        /* for DDX we want to next X pixel, DDY next Y pixel. */
        idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 
: 2;
        trbl_tid = LLVMBuildAdd(gallivm->builder, tl_tid,
                                  lp_build_const_int32(gallivm, idx), "");
-       load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
+
+       if (!has_ds_bpermute) {
+               store_ptr = build_gep0(ctx, ctx->lds, thread_id);
+               load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
+               load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
+       }
 
        for (c = 0; c < 4; ++c) {
                unsigned i;
                LLVMValueRef val;
                LLVMValueRef args[2];
 
                swizzle[c] = 
tgsi_util_get_full_src_register_swizzle(&inst->Src[0], c);
                for (i = 0; i < c; ++i) {
                        if (swizzle[i] == swizzle[c]) {
                                result[c] = result[i];
                                break;
                        }
                }
                if (i != c)
                        continue;
 
                val = LLVMBuildBitCast(gallivm->builder,
                                lp_build_emit_fetch(bld_base, inst, 0, c),
                                                ctx->i32, "");
 
-               if ((HAVE_LLVM >= 0x0309) && ctx->screen->b.family >= 
CHIP_TONGA) {
-
+               if (has_ds_bpermute) {
                        args[0] = LLVMBuildMul(gallivm->builder, tl_tid,
                                         lp_build_const_int32(gallivm, 4), "");
                        args[1] = val;
                        tl = lp_build_intrinsic(gallivm->builder,
                                        "llvm.amdgcn.ds.bpermute", ctx->i32,
                                        args, 2, LLVMReadNoneAttribute);
 
                        args[0] = LLVMBuildMul(gallivm->builder, trbl_tid,
                                         lp_build_const_int32(gallivm, 4), "");
                        trbl = lp_build_intrinsic(gallivm->builder,
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to