From: Nicolai Hähnle <nicolai.haeh...@amd.com>

In the alloca'd array case, no longer create redundant and unused allocas
for the individual elements; create getelementptrs instead.

Reviewed-by: Tom Stellard <thomas.stell...@amd.com>
---
 .../drivers/radeon/radeon_setup_tgsi_llvm.c        | 27 ++++++++++++++--------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index d75311e..41f24d3 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -408,81 +408,90 @@ static LLVMValueRef si_build_alloca_undef(struct 
gallivm_state *gallivm,
        LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
        LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
        return ptr;
 }
 
 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
                             const struct tgsi_full_declaration *decl)
 {
        struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-       unsigned first, last, i, idx;
+       unsigned first, last, i;
        switch(decl->Declaration.File) {
        case TGSI_FILE_ADDRESS:
        {
                 unsigned idx;
                for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
                        unsigned chan;
                        for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
                                 ctx->soa.addr[idx][chan] = 
si_build_alloca_undef(
                                        &ctx->gallivm,
                                        ctx->soa.bld_base.uint_bld.elem_type, 
"");
                        }
                }
                break;
        }
 
        case TGSI_FILE_TEMPORARY:
        {
+               LLVMValueRef array_alloca = NULL;
                unsigned decl_size;
                first = decl->Range.First;
                last = decl->Range.Last;
                decl_size = 4 * ((last - first) + 1);
                if (decl->Declaration.Array) {
                        unsigned id = decl->Array.ArrayID - 1;
                        if (!ctx->arrays) {
                                int size = 
bld_base->info->array_max[TGSI_FILE_TEMPORARY];
                                ctx->arrays = CALLOC(size, 
sizeof(ctx->arrays[0]));
-                       for (i = 0; i < size; ++i) {
-                               assert(!ctx->arrays[i].alloca);}
                        }
 
                        ctx->arrays[id].range = decl->Range;
 
                        /* If the array is more than 16 elements (each element
                         * is 32-bits), then store it in a vector.  Storing the
                         * array in a vector will causes the compiler to store
                         * the array in registers and access it using indirect
                         * addressing.  16 is number of vector elements that
                         * LLVM will store in a register.
                         * FIXME: We shouldn't need to do this.  LLVM should be
                         * smart enough to promote allocas int registers when
                         * profitable.
                         */
                        if (decl_size > 16) {
-                               ctx->arrays[id].alloca = 
LLVMBuildAlloca(builder,
+                               array_alloca = LLVMBuildAlloca(builder,
                                        LLVMArrayType(bld_base->base.vec_type, 
decl_size),"array");
+                               ctx->arrays[id].alloca = array_alloca;
                        }
                }
-               first = decl->Range.First;
-               last = decl->Range.Last;
+
                if (!ctx->temps_count) {
                        ctx->temps_count = 
bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
                        ctx->temps = MALLOC(TGSI_NUM_CHANNELS * 
ctx->temps_count * sizeof(LLVMValueRef));
                }
-               for (idx = first; idx <= last; idx++) {
-                       for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
-                               ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
+               if (!array_alloca) {
+                       for (i = 0; i < decl_size; ++i) {
+                               ctx->temps[first * TGSI_NUM_CHANNELS + i] =
                                        
si_build_alloca_undef(bld_base->base.gallivm,
                                                              
bld_base->base.vec_type,
                                                              "temp");
                        }
+               } else {
+                       LLVMValueRef idxs[2] = {
+                               bld_base->uint_bld.zero,
+                               NULL
+                       };
+                       for (i = 0; i < decl_size; ++i) {
+                               idxs[1] = 
lp_build_const_int32(bld_base->base.gallivm, i);
+                               ctx->temps[first * TGSI_NUM_CHANNELS + i] =
+                                       LLVMBuildGEP(builder, array_alloca, 
idxs, 2, "temp");
+                       }
                }
                break;
        }
        case TGSI_FILE_INPUT:
        {
                unsigned idx;
                for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
                        if (ctx->load_input)
                                ctx->load_input(ctx, idx, decl);
                }
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to