On Tue, Aug 09, 2016 at 12:36:37PM +0200, Nicolai Hähnle wrote:
> From: Nicolai Hähnle <nicolai.haeh...@amd.com>
> 
> In the alloca'd array case, no longer create redundant and unused allocas
> for the individual elements; create getelementptrs instead.

Reviewed-by: Tom Stellard <thomas.stell...@amd.com>
> ---
>  .../drivers/radeon/radeon_setup_tgsi_llvm.c        | 27 
> ++++++++++++++--------
>  1 file changed, 18 insertions(+), 9 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index d75311e..41f24d3 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -408,81 +408,90 @@ static LLVMValueRef si_build_alloca_undef(struct 
> gallivm_state *gallivm,
>       LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
>       LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
>       return ptr;
>  }
>  
>  static void emit_declaration(struct lp_build_tgsi_context *bld_base,
>                            const struct tgsi_full_declaration *decl)
>  {
>       struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
>       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> -     unsigned first, last, i, idx;
> +     unsigned first, last, i;
>       switch(decl->Declaration.File) {
>       case TGSI_FILE_ADDRESS:
>       {
>                unsigned idx;
>               for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
>                       unsigned chan;
>                       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
>                                ctx->soa.addr[idx][chan] = 
> si_build_alloca_undef(
>                                       &ctx->gallivm,
>                                       ctx->soa.bld_base.uint_bld.elem_type, 
> "");
>                       }
>               }
>               break;
>       }
>  
>       case TGSI_FILE_TEMPORARY:
>       {
> +             LLVMValueRef array_alloca = NULL;
>               unsigned decl_size;
>               first = decl->Range.First;
>               last = decl->Range.Last;
>               decl_size = 4 * ((last - first) + 1);
>               if (decl->Declaration.Array) {
>                       unsigned id = decl->Array.ArrayID - 1;
>                       if (!ctx->arrays) {
>                               int size = 
> bld_base->info->array_max[TGSI_FILE_TEMPORARY];
>                               ctx->arrays = CALLOC(size, 
> sizeof(ctx->arrays[0]));
> -                     for (i = 0; i < size; ++i) {
> -                             assert(!ctx->arrays[i].alloca);}
>                       }
>  
>                       ctx->arrays[id].range = decl->Range;
>  
>                       /* If the array is more than 16 elements (each element
>                        * is 32-bits), then store it in a vector.  Storing the
>                        * array in a vector will causes the compiler to store
>                        * the array in registers and access it using indirect
>                        * addressing.  16 is number of vector elements that
>                        * LLVM will store in a register.
>                        * FIXME: We shouldn't need to do this.  LLVM should be
>                        * smart enough to promote allocas int registers when
>                        * profitable.
>                        */
>                       if (decl_size > 16) {
> -                             ctx->arrays[id].alloca = 
> LLVMBuildAlloca(builder,
> +                             array_alloca = LLVMBuildAlloca(builder,
>                                       LLVMArrayType(bld_base->base.vec_type, 
> decl_size),"array");
> +                             ctx->arrays[id].alloca = array_alloca;
>                       }
>               }
> -             first = decl->Range.First;
> -             last = decl->Range.Last;
> +
>               if (!ctx->temps_count) {
>                       ctx->temps_count = 
> bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
>                       ctx->temps = MALLOC(TGSI_NUM_CHANNELS * 
> ctx->temps_count * sizeof(LLVMValueRef));
>               }
> -             for (idx = first; idx <= last; idx++) {
> -                     for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
> -                             ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
> +             if (!array_alloca) {
> +                     for (i = 0; i < decl_size; ++i) {
> +                             ctx->temps[first * TGSI_NUM_CHANNELS + i] =
>                                       
> si_build_alloca_undef(bld_base->base.gallivm,
>                                                             
> bld_base->base.vec_type,
>                                                             "temp");
>                       }
> +             } else {
> +                     LLVMValueRef idxs[2] = {
> +                             bld_base->uint_bld.zero,
> +                             NULL
> +                     };
> +                     for (i = 0; i < decl_size; ++i) {
> +                             idxs[1] = 
> lp_build_const_int32(bld_base->base.gallivm, i);
> +                             ctx->temps[first * TGSI_NUM_CHANNELS + i] =
> +                                     LLVMBuildGEP(builder, array_alloca, 
> idxs, 2, "temp");
> +                     }
>               }
>               break;
>       }
>       case TGSI_FILE_INPUT:
>       {
>               unsigned idx;
>               for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
>                       if (ctx->load_input)
>                               ctx->load_input(ctx, idx, decl);
>               }
> -- 
> 2.7.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to