Re: [Mesa-dev] [PATCH 08/19] gallium/radeon: clean up emit_declaration for temporaries

2016-08-10 Thread Tom Stellard
On Tue, Aug 09, 2016 at 12:36:37PM +0200, Nicolai Hähnle wrote:
> From: Nicolai Hähnle 
> 
> In the alloca'd array case, no longer create redundant and unused allocas
> for the individual elements; create getelementptrs instead.

Reviewed-by: Tom Stellard 
> ---
>  .../drivers/radeon/radeon_setup_tgsi_llvm.c| 27 
> ++
>  1 file changed, 18 insertions(+), 9 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index d75311e..41f24d3 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -408,81 +408,90 @@ static LLVMValueRef si_build_alloca_undef(struct 
> gallivm_state *gallivm,
>   LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
>   LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
>   return ptr;
>  }
>  
>  static void emit_declaration(struct lp_build_tgsi_context *bld_base,
>const struct tgsi_full_declaration *decl)
>  {
>   struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
>   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> - unsigned first, last, i, idx;
> + unsigned first, last, i;
>   switch(decl->Declaration.File) {
>   case TGSI_FILE_ADDRESS:
>   {
>unsigned idx;
>   for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
>   unsigned chan;
>   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
>ctx->soa.addr[idx][chan] = 
> si_build_alloca_undef(
>   >gallivm,
>   ctx->soa.bld_base.uint_bld.elem_type, 
> "");
>   }
>   }
>   break;
>   }
>  
>   case TGSI_FILE_TEMPORARY:
>   {
> + LLVMValueRef array_alloca = NULL;
>   unsigned decl_size;
>   first = decl->Range.First;
>   last = decl->Range.Last;
>   decl_size = 4 * ((last - first) + 1);
>   if (decl->Declaration.Array) {
>   unsigned id = decl->Array.ArrayID - 1;
>   if (!ctx->arrays) {
>   int size = 
> bld_base->info->array_max[TGSI_FILE_TEMPORARY];
>   ctx->arrays = CALLOC(size, 
> sizeof(ctx->arrays[0]));
> - for (i = 0; i < size; ++i) {
> - assert(!ctx->arrays[i].alloca);}
>   }
>  
>   ctx->arrays[id].range = decl->Range;
>  
>   /* If the array is more than 16 elements (each element
>* is 32-bits), then store it in a vector.  Storing the
>* array in a vector will causes the compiler to store
>* the array in registers and access it using indirect
>* addressing.  16 is number of vector elements that
>* LLVM will store in a register.
>* FIXME: We shouldn't need to do this.  LLVM should be
>* smart enough to promote allocas int registers when
>* profitable.
>*/
>   if (decl_size > 16) {
> - ctx->arrays[id].alloca = 
> LLVMBuildAlloca(builder,
> + array_alloca = LLVMBuildAlloca(builder,
>   LLVMArrayType(bld_base->base.vec_type, 
> decl_size),"array");
> + ctx->arrays[id].alloca = array_alloca;
>   }
>   }
> - first = decl->Range.First;
> - last = decl->Range.Last;
> +
>   if (!ctx->temps_count) {
>   ctx->temps_count = 
> bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
>   ctx->temps = MALLOC(TGSI_NUM_CHANNELS * 
> ctx->temps_count * sizeof(LLVMValueRef));
>   }
> - for (idx = first; idx <= last; idx++) {
> - for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
> - ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
> + if (!array_alloca) {
> + for (i = 0; i < decl_size; ++i) {
> + ctx->temps[first * TGSI_NUM_CHANNELS + i] =
>   
> si_build_alloca_undef(bld_base->base.gallivm,
> 
> bld_base->base.vec_type,
> "temp");
>   }
> + } else {
> + LLVMValueRef idxs[2] = {
> + 

[Mesa-dev] [PATCH 08/19] gallium/radeon: clean up emit_declaration for temporaries

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

In the alloca'd array case, no longer create redundant and unused allocas
for the individual elements; create getelementptrs instead.
---
 .../drivers/radeon/radeon_setup_tgsi_llvm.c| 27 ++
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index d75311e..41f24d3 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -408,81 +408,90 @@ static LLVMValueRef si_build_alloca_undef(struct 
gallivm_state *gallivm,
LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
return ptr;
 }
 
 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 const struct tgsi_full_declaration *decl)
 {
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-   unsigned first, last, i, idx;
+   unsigned first, last, i;
switch(decl->Declaration.File) {
case TGSI_FILE_ADDRESS:
{
 unsigned idx;
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
unsigned chan;
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 ctx->soa.addr[idx][chan] = 
si_build_alloca_undef(
>gallivm,
ctx->soa.bld_base.uint_bld.elem_type, 
"");
}
}
break;
}
 
case TGSI_FILE_TEMPORARY:
{
+   LLVMValueRef array_alloca = NULL;
unsigned decl_size;
first = decl->Range.First;
last = decl->Range.Last;
decl_size = 4 * ((last - first) + 1);
if (decl->Declaration.Array) {
unsigned id = decl->Array.ArrayID - 1;
if (!ctx->arrays) {
int size = 
bld_base->info->array_max[TGSI_FILE_TEMPORARY];
ctx->arrays = CALLOC(size, 
sizeof(ctx->arrays[0]));
-   for (i = 0; i < size; ++i) {
-   assert(!ctx->arrays[i].alloca);}
}
 
ctx->arrays[id].range = decl->Range;
 
/* If the array is more than 16 elements (each element
 * is 32-bits), then store it in a vector.  Storing the
 * array in a vector will causes the compiler to store
 * the array in registers and access it using indirect
 * addressing.  16 is number of vector elements that
 * LLVM will store in a register.
 * FIXME: We shouldn't need to do this.  LLVM should be
 * smart enough to promote allocas int registers when
 * profitable.
 */
if (decl_size > 16) {
-   ctx->arrays[id].alloca = 
LLVMBuildAlloca(builder,
+   array_alloca = LLVMBuildAlloca(builder,
LLVMArrayType(bld_base->base.vec_type, 
decl_size),"array");
+   ctx->arrays[id].alloca = array_alloca;
}
}
-   first = decl->Range.First;
-   last = decl->Range.Last;
+
if (!ctx->temps_count) {
ctx->temps_count = 
bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
ctx->temps = MALLOC(TGSI_NUM_CHANNELS * 
ctx->temps_count * sizeof(LLVMValueRef));
}
-   for (idx = first; idx <= last; idx++) {
-   for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
-   ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
+   if (!array_alloca) {
+   for (i = 0; i < decl_size; ++i) {
+   ctx->temps[first * TGSI_NUM_CHANNELS + i] =

si_build_alloca_undef(bld_base->base.gallivm,
  
bld_base->base.vec_type,
  "temp");
}
+   } else {
+   LLVMValueRef idxs[2] = {
+   bld_base->uint_bld.zero,
+   NULL
+   };
+   for (i = 0; i < decl_size; ++i) {
+   idxs[1] =