This is so that we can split different types of loads more easily.

Signed-off-by: Rhys Perry <pendingchao...@gmail.com>
---
 src/amd/common/ac_llvm_build.c  |  8 ++--
 src/amd/common/ac_nir_to_llvm.c | 80 ++++++++++++++++-----------------
 src/compiler/nir/nir.h          |  2 +-
 3 files changed, 44 insertions(+), 46 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index abc18da13d..154cc696a2 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2943,9 +2943,11 @@ LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, 
LLVMValueRef value,
        if (count == num_components)
                return value;
 
-       LLVMValueRef masks[] = {
-           ctx->i32_0, ctx->i32_1,
-           LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
+       LLVMValueRef masks[MAX2(count, 2)];
+       masks[0] = ctx->i32_0;
+       masks[1] = ctx->i32_1;
+       for (unsigned i = 2; i < count; i++)
+               masks[i] = LLVMConstInt(ctx->i32, i, false);
 
        if (count == 1)
                return LLVMBuildExtractElement(ctx->builder, value, masks[0],
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index a109f5a815..c05b45e084 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1623,37 +1623,43 @@ static LLVMValueRef visit_atomic_ssbo(struct 
ac_nir_context *ctx,
 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
                                       const nir_intrinsic_instr *instr)
 {
-       LLVMValueRef results[2];
-       int load_bytes;
        int elem_size_bytes = instr->dest.ssa.bit_size / 8;
        int num_components = instr->num_components;
-       int num_bytes = num_components * elem_size_bytes;
        enum gl_access_qualifier access = nir_intrinsic_access(instr);
        LLVMValueRef glc = ctx->ac.i1false;
 
        if (access & (ACCESS_VOLATILE | ACCESS_COHERENT))
                glc = ctx->ac.i1true;
 
-       for (int i = 0; i < num_bytes; i += load_bytes) {
-               load_bytes = MIN2(num_bytes - i, 16);
-               const char *load_name;
-               LLVMTypeRef data_type;
-               LLVMValueRef offset = get_src(ctx, instr->src[1]);
-               LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i, false);
-               LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
-                                                       get_src(ctx, 
instr->src[0]), false);
-               LLVMValueRef vindex = ctx->ac.i32_0;
-
-               int idx = i ? 1 : 0;
+       LLVMValueRef offset = get_src(ctx, instr->src[1]);
+       LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
+                                               get_src(ctx, instr->src[0]), 
false);
+       LLVMValueRef vindex = ctx->ac.i32_0;
+
+       LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
+       LLVMTypeRef def_elem_type = num_components > 1 ? 
LLVMGetElementType(def_type) : def_type;
+
+       LLVMValueRef results[4];
+       for (int i = 0; i < num_components;) {
+               int num_elems = num_components - i;
+               if (num_elems * elem_size_bytes > 16)
+                       num_elems = 16 / elem_size_bytes;
+               int load_bytes = num_elems * elem_size_bytes;
+
+               LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * 
elem_size_bytes, false);
+
+               LLVMValueRef ret;
                if (load_bytes == 2) {
-                       results[idx] = ac_build_tbuffer_load_short(&ctx->ac,
-                                                                  rsrc,
-                                                                  vindex,
-                                                                  offset,
-                                                                  
ctx->ac.i32_0,
-                                                                  immoffset,
-                                                                  glc);
+                       ret = ac_build_tbuffer_load_short(&ctx->ac,
+                                                         rsrc,
+                                                         vindex,
+                                                         offset,
+                                                         ctx->ac.i32_0,
+                                                         immoffset,
+                                                         glc);
                } else {
+                       const char *load_name;
+                       LLVMTypeRef data_type;
                        switch (load_bytes) {
                        case 16:
                        case 12:
@@ -1679,33 +1685,23 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx,
                                glc,
                                ctx->ac.i1false,
                        };
-                       results[idx] = ac_build_intrinsic(&ctx->ac, load_name, 
data_type, params, 5, 0);
-                       unsigned num_elems = ac_get_type_size(data_type) / 
elem_size_bytes;
-                       LLVMTypeRef resTy = 
LLVMVectorType(LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size), 
num_elems);
-                       results[idx] = LLVMBuildBitCast(ctx->ac.builder, 
results[idx], resTy, "");
+                       ret = ac_build_intrinsic(&ctx->ac, load_name, 
data_type, params, 5, 0);
                }
-       }
 
-       assume(results[0]);
-       LLVMValueRef ret = results[0];
-       if (num_bytes > 16 || num_components == 3) {
-               LLVMValueRef masks[] = {
-                       LLVMConstInt(ctx->ac.i32, 0, false), 
LLVMConstInt(ctx->ac.i32, 1, false),
-                       LLVMConstInt(ctx->ac.i32, 2, false), 
LLVMConstInt(ctx->ac.i32, 3, false),
-               };
+               LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, 
ac_get_type_size(LLVMTypeOf(ret)));
+               ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
+               ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
 
-               if (num_bytes > 16 && num_components == 3) {
-                       /* we end up with a v2i64 and i64 but shuffle fails on 
that */
-                       results[1] = ac_build_expand(&ctx->ac, results[1], 1, 
2);
-               }
+               LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
+               ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
 
-               LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
-               ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0],
-                                            results[num_bytes > 16 ? 1 : 0], 
swizzle, "");
+               for (unsigned j = 0; j < num_elems; j++) {
+                       results[i + j] = 
LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, 
false), "");
+               }
+               i += num_elems;
        }
 
-       return LLVMBuildBitCast(ctx->ac.builder, ret,
-                               get_def_type(ctx, &instr->dest.ssa), "");
+       return ac_build_gather_values(&ctx->ac, results, num_components);
 }
 
 static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index db935c8496..ba85fc0547 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1376,7 +1376,7 @@ nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
  * to satisfy X % align == 0.
  */
 static inline unsigned
-nir_intrinsic_align(nir_intrinsic_instr *intrin)
+nir_intrinsic_align(const nir_intrinsic_instr *intrin)
 {
    const unsigned align_mul = nir_intrinsic_align_mul(intrin);
    const unsigned align_offset = nir_intrinsic_align_offset(intrin);
-- 
2.19.2

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to