On Tue, Aug 09, 2016 at 12:36:37PM +0200, Nicolai Hähnle wrote:
> From: Nicolai Hähnle
>
> In the alloca'd array case, no longer create redundant and unused allocas
> for the individual elements; create getelementptrs instead.
Reviewed-by: Tom Stellard
> ---
> .../drivers/radeon/radeon_setup_tgsi_llvm.c| 27
> ++
> 1 file changed, 18 insertions(+), 9 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index d75311e..41f24d3 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -408,81 +408,90 @@ static LLVMValueRef si_build_alloca_undef(struct
> gallivm_state *gallivm,
> LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
> LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
> return ptr;
> }
>
> static void emit_declaration(struct lp_build_tgsi_context *bld_base,
>const struct tgsi_full_declaration *decl)
> {
> struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
> LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> - unsigned first, last, i, idx;
> + unsigned first, last, i;
> switch(decl->Declaration.File) {
> case TGSI_FILE_ADDRESS:
> {
>unsigned idx;
> for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
> unsigned chan;
> for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
>ctx->soa.addr[idx][chan] =
> si_build_alloca_undef(
> &ctx->gallivm,
> ctx->soa.bld_base.uint_bld.elem_type,
> "");
> }
> }
> break;
> }
>
> case TGSI_FILE_TEMPORARY:
> {
> + LLVMValueRef array_alloca = NULL;
> unsigned decl_size;
> first = decl->Range.First;
> last = decl->Range.Last;
> decl_size = 4 * ((last - first) + 1);
> if (decl->Declaration.Array) {
> unsigned id = decl->Array.ArrayID - 1;
> if (!ctx->arrays) {
> int size =
> bld_base->info->array_max[TGSI_FILE_TEMPORARY];
> ctx->arrays = CALLOC(size,
> sizeof(ctx->arrays[0]));
> - for (i = 0; i < size; ++i) {
> - assert(!ctx->arrays[i].alloca);}
> }
>
> ctx->arrays[id].range = decl->Range;
>
> /* If the array is more than 16 elements (each element
>* is 32-bits), then store it in a vector. Storing the
>* array in a vector will causes the compiler to store
>* the array in registers and access it using indirect
>* addressing. 16 is number of vector elements that
>* LLVM will store in a register.
>* FIXME: We shouldn't need to do this. LLVM should be
>* smart enough to promote allocas int registers when
>* profitable.
>*/
> if (decl_size > 16) {
> - ctx->arrays[id].alloca =
> LLVMBuildAlloca(builder,
> + array_alloca = LLVMBuildAlloca(builder,
> LLVMArrayType(bld_base->base.vec_type,
> decl_size),"array");
> + ctx->arrays[id].alloca = array_alloca;
> }
> }
> - first = decl->Range.First;
> - last = decl->Range.Last;
> +
> if (!ctx->temps_count) {
> ctx->temps_count =
> bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
> ctx->temps = MALLOC(TGSI_NUM_CHANNELS *
> ctx->temps_count * sizeof(LLVMValueRef));
> }
> - for (idx = first; idx <= last; idx++) {
> - for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
> - ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
> + if (!array_alloca) {
> + for (i = 0; i < decl_size; ++i) {
> + ctx->temps[first * TGSI_NUM_CHANNELS + i] =
>
> si_build_alloca_undef(bld_base->base.gallivm,
>
> bld_base->base.vec_type,
> "temp");
> }
> + } else {
> + LLVMValueRef idxs[2] = {
> + bld_base->uint_bld.zero,
> + NULL
> +