v2: do not force enable IDXEN when unecessary

Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
---
 src/amd/common/ac_llvm_build.c  | 111 ++++++++++++++++++++++++++++++++
 src/amd/common/ac_llvm_build.h  |  26 ++++++++
 src/amd/common/ac_nir_to_llvm.c |  26 ++------
 3 files changed, 142 insertions(+), 21 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 8d5682f6f7a..06dc1383121 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1554,6 +1554,117 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
                                  ac_get_load_intr_attribs(can_speculate));
 }
 
+static void
+ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx,
+                            LLVMValueRef rsrc,
+                            LLVMValueRef vdata,
+                            LLVMValueRef vindex,
+                            LLVMValueRef voffset,
+                            LLVMValueRef soffset,
+                            unsigned num_channels,
+                            unsigned dfmt,
+                            unsigned nfmt,
+                            bool glc,
+                            bool slc,
+                            bool writeonly_memory,
+                            bool structurized)
+{
+       LLVMValueRef args[7];
+       int idx = 0;
+       args[idx++] = vdata;
+       args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+       if (structurized)
+               args[idx++] = vindex ? vindex : ctx->i32_0;
+       args[idx++] = voffset ? voffset : ctx->i32_0;
+       args[idx++] = soffset ? soffset : ctx->i32_0;
+       args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
+       args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+       unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+       const char *type_names[] = {"i32", "v2i32", "v4i32"};
+       const char *indexing_kind = structurized ? "struct" : "raw";
+       char name[256];
+
+       snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s",
+                indexing_kind, type_names[func]);
+
+       ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
+                          ac_get_store_intr_attribs(writeonly_memory));
+}
+
+void
+ac_build_tbuffer_store(struct ac_llvm_context *ctx,
+                      LLVMValueRef rsrc,
+                      LLVMValueRef vdata,
+                      LLVMValueRef vindex,
+                      LLVMValueRef voffset,
+                      LLVMValueRef soffset,
+                      LLVMValueRef immoffset,
+                      unsigned num_channels,
+                      unsigned dfmt,
+                      unsigned nfmt,
+                      bool glc,
+                      bool slc,
+                      bool writeonly_memory)
+{
+       if (HAVE_LLVM >= 0x800) {
+               bool structurized = vindex && vindex != ctx->i32_0;
+
+               voffset = LLVMBuildAdd(ctx->builder,
+                                      voffset ? voffset : ctx->i32_0,
+                                      immoffset, "");
+
+               ac_build_llvm8_tbuffer_store(ctx, rsrc, vdata, vindex, voffset,
+                                            soffset, num_channels, dfmt, nfmt,
+                                            glc, slc, writeonly_memory,
+                                            structurized);
+       } else {
+               LLVMValueRef params[] = {
+                       vdata,
+                       rsrc,
+                       vindex,
+                       voffset ? voffset : ctx->i32_0,
+                       soffset ? soffset : ctx->i32_0,
+                       immoffset,
+                       LLVMConstInt(ctx->i32, dfmt, false),
+                       LLVMConstInt(ctx->i32, nfmt, false),
+                       LLVMConstInt(ctx->i32, glc, false),
+                       LLVMConstInt(ctx->i32, slc, false),
+               };
+               unsigned func = CLAMP(num_channels, 1, 3) - 1;
+               const char *type_names[] = {"i32", "v2i32", "v4i32"};
+               char name[256];
+
+               snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
+                        type_names[func]);
+
+               ac_build_intrinsic(ctx, name, ctx->voidt, params, 10,
+                                  ac_get_store_intr_attribs(writeonly_memory));
+       }
+}
+
+void
+ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
+                            LLVMValueRef rsrc,
+                            LLVMValueRef vdata,
+                            LLVMValueRef vindex,
+                            LLVMValueRef voffset,
+                            LLVMValueRef soffset,
+                            bool glc,
+                            bool slc,
+                            bool writeonly_memory)
+{
+       unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
+       unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+       vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
+       vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
+
+       ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset,
+                              ctx->i32_0, 1, dfmt, nfmt, glc, slc,
+                              writeonly_memory);
+}
+
 /**
  * Set range metadata on an instruction.  This can only be used on load and
  * call instructions.  If you know an instruction can only produce the values
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 069ba7aa3c9..5ca93e66982 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -331,6 +331,32 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
                            bool can_speculate,
                            bool structurized);
 
+void
+ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
+                            LLVMValueRef rsrc,
+                            LLVMValueRef vdata,
+                            LLVMValueRef vindex,
+                            LLVMValueRef voffset,
+                            LLVMValueRef soffset,
+                            bool glc,
+                            bool slc,
+                            bool writeonly_memory);
+
+void
+ac_build_tbuffer_store(struct ac_llvm_context *ctx,
+                      LLVMValueRef rsrc,
+                      LLVMValueRef vdata,
+                      LLVMValueRef vindex,
+                      LLVMValueRef voffset,
+                      LLVMValueRef soffset,
+                      LLVMValueRef immoffset,
+                      unsigned num_channels,
+                      unsigned dfmt,
+                      unsigned nfmt,
+                      bool glc,
+                      bool slc,
+                      bool writeonly_memory);
+
 LLVMValueRef
 ac_get_thread_id(struct ac_llvm_context *ctx);
 
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index f4d408cd587..f2070eb9a8e 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1521,14 +1521,12 @@ static unsigned get_cache_policy(struct ac_nir_context 
*ctx,
 static void visit_store_ssbo(struct ac_nir_context *ctx,
                              nir_intrinsic_instr *instr)
 {
-       const char *store_name;
        LLVMValueRef src_data = get_src(ctx, instr->src[0]);
        int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) 
/ 8;
        unsigned writemask = nir_intrinsic_write_mask(instr);
        enum gl_access_qualifier access = nir_intrinsic_access(instr);
        bool writeonly_memory = access & ACCESS_NON_READABLE;
        unsigned cache_policy = get_cache_policy(ctx, access, false, 
writeonly_memory);
-       LLVMValueRef glc = (cache_policy & ac_glc) ? ctx->ac.i1true : 
ctx->ac.i1false;
 
        LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
                                        get_src(ctx, instr->src[1]), true);
@@ -1571,25 +1569,11 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
                                      LLVMConstInt(ctx->ac.i32, start * 
elem_size_bytes, false), "");
 
                if (num_bytes == 2) {
-                       store_name = "llvm.amdgcn.tbuffer.store.i32";
-                       data_type = ctx->ac.i32;
-                       data = LLVMBuildBitCast(ctx->ac.builder, data, 
ctx->ac.i16, "");
-                       data = LLVMBuildZExt(ctx->ac.builder, data, data_type, 
"");
-                       LLVMValueRef tbuffer_params[] = {
-                               data,
-                               rsrc,
-                               ctx->ac.i32_0, /* vindex */
-                               offset,        /* voffset */
-                               ctx->ac.i32_0,
-                               ctx->ac.i32_0,
-                               LLVMConstInt(ctx->ac.i32, 2, false), // dfmt (= 
16bit)
-                               LLVMConstInt(ctx->ac.i32, 4, false), // nfmt (= 
uint)
-                               glc,
-                               ctx->ac.i1false,
-                       };
-                       ac_build_intrinsic(&ctx->ac, store_name,
-                                          ctx->ac.voidt, tbuffer_params, 10,
-                                          
ac_get_store_intr_attribs(writeonly_memory));
+                       ac_build_tbuffer_store_short(&ctx->ac, rsrc, data,
+                                                    ctx->ac.i32_0, offset,
+                                                    ctx->ac.i32_0,
+                                                    cache_policy & ac_glc,
+                                                    false, writeonly_memory);
                } else {
                        int num_channels = num_bytes / 4;
 
-- 
2.21.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to