From: Nicolai Hähnle <nicolai.haeh...@amd.com> In the alloca'd array case, no longer create redundant and unused allocas for the individual elements; create getelementptrs instead.
Reviewed-by: Tom Stellard <thomas.stell...@amd.com> --- .../drivers/radeon/radeon_setup_tgsi_llvm.c | 27 ++++++++++++++-------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index d75311e..41f24d3 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -408,81 +408,90 @@ static LLVMValueRef si_build_alloca_undef(struct gallivm_state *gallivm, LLVMValueRef ptr = lp_build_alloca(gallivm, type, name); LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr); return ptr; } static void emit_declaration(struct lp_build_tgsi_context *bld_base, const struct tgsi_full_declaration *decl) { struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); LLVMBuilderRef builder = bld_base->base.gallivm->builder; - unsigned first, last, i, idx; + unsigned first, last, i; switch(decl->Declaration.File) { case TGSI_FILE_ADDRESS: { unsigned idx; for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { ctx->soa.addr[idx][chan] = si_build_alloca_undef( &ctx->gallivm, ctx->soa.bld_base.uint_bld.elem_type, ""); } } break; } case TGSI_FILE_TEMPORARY: { + LLVMValueRef array_alloca = NULL; unsigned decl_size; first = decl->Range.First; last = decl->Range.Last; decl_size = 4 * ((last - first) + 1); if (decl->Declaration.Array) { unsigned id = decl->Array.ArrayID - 1; if (!ctx->arrays) { int size = bld_base->info->array_max[TGSI_FILE_TEMPORARY]; ctx->arrays = CALLOC(size, sizeof(ctx->arrays[0])); - for (i = 0; i < size; ++i) { - assert(!ctx->arrays[i].alloca);} } ctx->arrays[id].range = decl->Range; /* If the array is more than 16 elements (each element * is 32-bits), then store it in a vector. Storing the * array in a vector will causes the compiler to store * the array in registers and access it using indirect * addressing. 16 is number of vector elements that * LLVM will store in a register. * FIXME: We shouldn't need to do this. LLVM should be * smart enough to promote allocas int registers when * profitable. */ if (decl_size > 16) { - ctx->arrays[id].alloca = LLVMBuildAlloca(builder, + array_alloca = LLVMBuildAlloca(builder, LLVMArrayType(bld_base->base.vec_type, decl_size),"array"); + ctx->arrays[id].alloca = array_alloca; } } - first = decl->Range.First; - last = decl->Range.Last; + if (!ctx->temps_count) { ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1; ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef)); } - for (idx = first; idx <= last; idx++) { - for (i = 0; i < TGSI_NUM_CHANNELS; i++) { - ctx->temps[idx * TGSI_NUM_CHANNELS + i] = + if (!array_alloca) { + for (i = 0; i < decl_size; ++i) { + ctx->temps[first * TGSI_NUM_CHANNELS + i] = si_build_alloca_undef(bld_base->base.gallivm, bld_base->base.vec_type, "temp"); } + } else { + LLVMValueRef idxs[2] = { + bld_base->uint_bld.zero, + NULL + }; + for (i = 0; i < decl_size; ++i) { + idxs[1] = lp_build_const_int32(bld_base->base.gallivm, i); + ctx->temps[first * TGSI_NUM_CHANNELS + i] = + LLVMBuildGEP(builder, array_alloca, idxs, 2, "temp"); + } } break; } case TGSI_FILE_INPUT: { unsigned idx; for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { if (ctx->load_input) ctx->load_input(ctx, idx, decl); } -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev