From: Nicolai Hähnle <[email protected]>
In the alloca'd array case, no longer create redundant and unused allocas
for the individual elements; create getelementptrs instead.
---
.../drivers/radeon/radeon_setup_tgsi_llvm.c | 27 ++++++++++++++--------
1 file changed, 18 insertions(+), 9 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index d75311e..41f24d3 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -408,81 +408,90 @@ static LLVMValueRef si_build_alloca_undef(struct
gallivm_state *gallivm,
LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
return ptr;
}
static void emit_declaration(struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_declaration *decl)
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- unsigned first, last, i, idx;
+ unsigned first, last, i;
switch(decl->Declaration.File) {
case TGSI_FILE_ADDRESS:
{
unsigned idx;
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
unsigned chan;
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
ctx->soa.addr[idx][chan] =
si_build_alloca_undef(
&ctx->gallivm,
ctx->soa.bld_base.uint_bld.elem_type,
"");
}
}
break;
}
case TGSI_FILE_TEMPORARY:
{
+ LLVMValueRef array_alloca = NULL;
unsigned decl_size;
first = decl->Range.First;
last = decl->Range.Last;
decl_size = 4 * ((last - first) + 1);
if (decl->Declaration.Array) {
unsigned id = decl->Array.ArrayID - 1;
if (!ctx->arrays) {
int size =
bld_base->info->array_max[TGSI_FILE_TEMPORARY];
ctx->arrays = CALLOC(size,
sizeof(ctx->arrays[0]));
- for (i = 0; i < size; ++i) {
- assert(!ctx->arrays[i].alloca);}
}
ctx->arrays[id].range = decl->Range;
/* If the array is more than 16 elements (each element
* is 32-bits), then store it in a vector. Storing the
* array in a vector will causes the compiler to store
* the array in registers and access it using indirect
* addressing. 16 is number of vector elements that
* LLVM will store in a register.
* FIXME: We shouldn't need to do this. LLVM should be
* smart enough to promote allocas int registers when
* profitable.
*/
if (decl_size > 16) {
- ctx->arrays[id].alloca =
LLVMBuildAlloca(builder,
+ array_alloca = LLVMBuildAlloca(builder,
LLVMArrayType(bld_base->base.vec_type,
decl_size),"array");
+ ctx->arrays[id].alloca = array_alloca;
}
}
- first = decl->Range.First;
- last = decl->Range.Last;
+
if (!ctx->temps_count) {
ctx->temps_count =
bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
ctx->temps = MALLOC(TGSI_NUM_CHANNELS *
ctx->temps_count * sizeof(LLVMValueRef));
}
- for (idx = first; idx <= last; idx++) {
- for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
- ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
+ if (!array_alloca) {
+ for (i = 0; i < decl_size; ++i) {
+ ctx->temps[first * TGSI_NUM_CHANNELS + i] =
si_build_alloca_undef(bld_base->base.gallivm,
bld_base->base.vec_type,
"temp");
}
+ } else {
+ LLVMValueRef idxs[2] = {
+ bld_base->uint_bld.zero,
+ NULL
+ };
+ for (i = 0; i < decl_size; ++i) {
+ idxs[1] =
lp_build_const_int32(bld_base->base.gallivm, i);
+ ctx->temps[first * TGSI_NUM_CHANNELS + i] =
+ LLVMBuildGEP(builder, array_alloca,
idxs, 2, "temp");
+ }
}
break;
}
case TGSI_FILE_INPUT:
{
unsigned idx;
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
if (ctx->load_input)
ctx->load_input(ctx, idx, decl);
}
--
2.7.4
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev