Re: [Mesa-dev] [PATCH] ac/nir: rewrite shared variable handling (v2)
Pushed! On Thu, Jul 13, 2017 at 3:39 PM, Connor Abbottwrote: > From: Connor Abbott > > Translate the NIR variables directly to LLVM instead of lowering to a > TGSI-style giant array of vec4's and then back to a variable. This > should fix indirect dereferences, make shared variables more tightly > packed, and make LLVM's alias analysis more precise. This should fix an > upcoming Feral title, which has a compute shader that was failing to > compile because the extra padding made us run out of LDS space. > > v2: Combine the previous two patches into one, only use this for shared > variables for now until LLVM becomes smarter. > > Cc: Alex Smith > Reviewed-by: Bas Nieuwenhuizen > --- > Alex: I made this by squashing together two patches from my series, so it > should still work, but I'd still like to get your Tested-by before I push to > make sure I didn't screw something up. > > src/amd/common/ac_nir_to_llvm.c | 245 > ++-- > 1 file changed, 158 insertions(+), 87 deletions(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index 9223310..9a69066 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -65,6 +65,7 @@ struct nir_to_llvm_context { > > struct hash_table *defs; > struct hash_table *phis; > + struct hash_table *vars; > > LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; > LLVMValueRef ring_offsets; > @@ -154,7 +155,6 @@ struct nir_to_llvm_context { > LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; > LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4]; > > - LLVMValueRef shared_memory; > uint64_t input_mask; > uint64_t output_mask; > int num_locals; > @@ -387,23 +387,6 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, > int num_elements) >CONST_ADDR_SPACE); > } > > -static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx, > - int idx, > - LLVMTypeRef type) > -{ > - LLVMValueRef offset; > - LLVMValueRef ptr; > - int addr_space; > - > - offset = LLVMConstInt(ctx->i32, idx * 16, false); > - > - ptr = ctx->shared_memory; > - ptr = LLVMBuildGEP(ctx->builder, ptr, , 1, ""); > - addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); > - ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, > addr_space), ""); > - return ptr; > -} > - > static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, > LLVMTypeRef t) > { > if (t == ctx->f16 || t == ctx->i16) > @@ -2905,6 +2888,45 @@ load_gs_input(struct nir_to_llvm_context *ctx, > return result; > } > > +static LLVMValueRef > +build_gep_for_deref(struct nir_to_llvm_context *ctx, > + nir_deref_var *deref) > +{ > + struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, > deref->var); > + assert(entry->data); > + LLVMValueRef val = entry->data; > + nir_deref *tail = deref->deref.child; > + while (tail != NULL) { > + LLVMValueRef offset; > + switch (tail->deref_type) { > + case nir_deref_type_array: { > + nir_deref_array *array = nir_deref_as_array(tail); > + offset = LLVMConstInt(ctx->i32, array->base_offset, > 0); > + if (array->deref_array_type == > + nir_deref_array_type_indirect) { > + offset = LLVMBuildAdd(ctx->builder, offset, > + get_src(ctx, > + > array->indirect), > + ""); > + } > + break; > + } > + case nir_deref_type_struct: { > + nir_deref_struct *deref_struct = > + nir_deref_as_struct(tail); > + offset = LLVMConstInt(ctx->i32, > + deref_struct->index, 0); > + break; > + } > + default: > + unreachable("bad deref type"); > + } > + val = ac_build_gep0(>ac, val, offset); > + tail = tail->child; > + } > + return val; > +} > + > static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, >nir_intrinsic_instr *instr) > { > @@ -2966,6 +2988,14 @@ static LLVMValueRef visit_load_var(struct > nir_to_llvm_context *ctx, > } > } > break; > + case
Re: [Mesa-dev] [PATCH] ac/nir: rewrite shared variable handling (v2)
On 14.07.2017 00:39, Connor Abbott wrote: From: Connor AbbottTranslate the NIR variables directly to LLVM instead of lowering to a TGSI-style giant array of vec4's and then back to a variable. This should fix indirect dereferences, make shared variables more tightly packed, and make LLVM's alias analysis more precise. This should fix an upcoming Feral title, which has a compute shader that was failing to compile because the extra padding made us run out of LDS space. v2: Combine the previous two patches into one, only use this for shared variables for now until LLVM becomes smarter. Cc: Alex Smith Reviewed-by: Bas Nieuwenhuizen Reviewed-by: Nicolai Hähnle --- Alex: I made this by squashing together two patches from my series, so it should still work, but I'd still like to get your Tested-by before I push to make sure I didn't screw something up. src/amd/common/ac_nir_to_llvm.c | 245 ++-- 1 file changed, 158 insertions(+), 87 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 9223310..9a69066 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -65,6 +65,7 @@ struct nir_to_llvm_context { struct hash_table *defs; struct hash_table *phis; + struct hash_table *vars; LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; LLVMValueRef ring_offsets; @@ -154,7 +155,6 @@ struct nir_to_llvm_context { LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4]; - LLVMValueRef shared_memory; uint64_t input_mask; uint64_t output_mask; int num_locals; @@ -387,23 +387,6 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) CONST_ADDR_SPACE); } -static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx, - int idx, - LLVMTypeRef type) -{ - LLVMValueRef offset; - LLVMValueRef ptr; - int addr_space; - - offset = LLVMConstInt(ctx->i32, idx * 16, false); - - ptr = ctx->shared_memory; - ptr = LLVMBuildGEP(ctx->builder, ptr, , 1, ""); - addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); - ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), ""); - return ptr; -} - static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t) { if (t == ctx->f16 || t == ctx->i16) @@ -2905,6 +2888,45 @@ load_gs_input(struct nir_to_llvm_context *ctx, return result; } +static LLVMValueRef +build_gep_for_deref(struct nir_to_llvm_context *ctx, + nir_deref_var *deref) +{ + struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var); + assert(entry->data); + LLVMValueRef val = entry->data; + nir_deref *tail = deref->deref.child; + while (tail != NULL) { + LLVMValueRef offset; + switch (tail->deref_type) { + case nir_deref_type_array: { + nir_deref_array *array = nir_deref_as_array(tail); + offset = LLVMConstInt(ctx->i32, array->base_offset, 0); + if (array->deref_array_type == + nir_deref_array_type_indirect) { + offset = LLVMBuildAdd(ctx->builder, offset, + get_src(ctx, + array->indirect), + ""); + } + break; + } + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = + nir_deref_as_struct(tail); + offset = LLVMConstInt(ctx->i32, + deref_struct->index, 0); + break; + } + default: + unreachable("bad deref type"); + } + val = ac_build_gep0(>ac, val, offset); + tail = tail->child; + } + return val; +} + static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { @@ -2966,6 +2988,14 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, } } break; + case nir_var_shared: { + LLVMValueRef address = build_gep_for_deref(ctx, + instr->variables[0]); + LLVMValueRef val =
Re: [Mesa-dev] [PATCH] ac/nir: rewrite shared variable handling (v2)
On 13 July 2017 at 23:39, Connor Abbottwrote: > From: Connor Abbott > > Translate the NIR variables directly to LLVM instead of lowering to a > TGSI-style giant array of vec4's and then back to a variable. This > should fix indirect dereferences, make shared variables more tightly > packed, and make LLVM's alias analysis more precise. This should fix an > upcoming Feral title, which has a compute shader that was failing to > compile because the extra padding made us run out of LDS space. > > v2: Combine the previous two patches into one, only use this for shared > variables for now until LLVM becomes smarter. > > Cc: Alex Smith > Reviewed-by: Bas Nieuwenhuizen > --- > Alex: I made this by squashing together two patches from my series, so it > should still work, but I'd still like to get your Tested-by before I push to > make sure I didn't screw something up. Looks good to me, thanks! Tested-by: Alex Smith > > src/amd/common/ac_nir_to_llvm.c | 245 > ++-- > 1 file changed, 158 insertions(+), 87 deletions(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index 9223310..9a69066 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -65,6 +65,7 @@ struct nir_to_llvm_context { > > struct hash_table *defs; > struct hash_table *phis; > + struct hash_table *vars; > > LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; > LLVMValueRef ring_offsets; > @@ -154,7 +155,6 @@ struct nir_to_llvm_context { > LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; > LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4]; > > - LLVMValueRef shared_memory; > uint64_t input_mask; > uint64_t output_mask; > int num_locals; > @@ -387,23 +387,6 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, > int num_elements) >CONST_ADDR_SPACE); > } > > -static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx, > - int idx, > - LLVMTypeRef type) > -{ > - LLVMValueRef offset; > - LLVMValueRef ptr; > - int addr_space; > - > - offset = LLVMConstInt(ctx->i32, idx * 16, false); > - > - ptr = ctx->shared_memory; > - ptr = LLVMBuildGEP(ctx->builder, ptr, , 1, ""); > - addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); > - ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, > addr_space), ""); > - return ptr; > -} > - > static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, > LLVMTypeRef t) > { > if (t == ctx->f16 || t == ctx->i16) > @@ -2905,6 +2888,45 @@ load_gs_input(struct nir_to_llvm_context *ctx, > return result; > } > > +static LLVMValueRef > +build_gep_for_deref(struct nir_to_llvm_context *ctx, > + nir_deref_var *deref) > +{ > + struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, > deref->var); > + assert(entry->data); > + LLVMValueRef val = entry->data; > + nir_deref *tail = deref->deref.child; > + while (tail != NULL) { > + LLVMValueRef offset; > + switch (tail->deref_type) { > + case nir_deref_type_array: { > + nir_deref_array *array = nir_deref_as_array(tail); > + offset = LLVMConstInt(ctx->i32, array->base_offset, > 0); > + if (array->deref_array_type == > + nir_deref_array_type_indirect) { > + offset = LLVMBuildAdd(ctx->builder, offset, > + get_src(ctx, > + > array->indirect), > + ""); > + } > + break; > + } > + case nir_deref_type_struct: { > + nir_deref_struct *deref_struct = > + nir_deref_as_struct(tail); > + offset = LLVMConstInt(ctx->i32, > + deref_struct->index, 0); > + break; > + } > + default: > + unreachable("bad deref type"); > + } > + val = ac_build_gep0(>ac, val, offset); > + tail = tail->child; > + } > + return val; > +} > + > static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, >nir_intrinsic_instr *instr) > { > @@ -2966,6 +2988,14 @@ static LLVMValueRef visit_load_var(struct > nir_to_llvm_context *ctx, >
[Mesa-dev] [PATCH] ac/nir: rewrite shared variable handling (v2)
From: Connor AbbottTranslate the NIR variables directly to LLVM instead of lowering to a TGSI-style giant array of vec4's and then back to a variable. This should fix indirect dereferences, make shared variables more tightly packed, and make LLVM's alias analysis more precise. This should fix an upcoming Feral title, which has a compute shader that was failing to compile because the extra padding made us run out of LDS space. v2: Combine the previous two patches into one, only use this for shared variables for now until LLVM becomes smarter. Cc: Alex Smith Reviewed-by: Bas Nieuwenhuizen --- Alex: I made this by squashing together two patches from my series, so it should still work, but I'd still like to get your Tested-by before I push to make sure I didn't screw something up. src/amd/common/ac_nir_to_llvm.c | 245 ++-- 1 file changed, 158 insertions(+), 87 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 9223310..9a69066 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -65,6 +65,7 @@ struct nir_to_llvm_context { struct hash_table *defs; struct hash_table *phis; + struct hash_table *vars; LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; LLVMValueRef ring_offsets; @@ -154,7 +155,6 @@ struct nir_to_llvm_context { LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4]; - LLVMValueRef shared_memory; uint64_t input_mask; uint64_t output_mask; int num_locals; @@ -387,23 +387,6 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) CONST_ADDR_SPACE); } -static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx, - int idx, - LLVMTypeRef type) -{ - LLVMValueRef offset; - LLVMValueRef ptr; - int addr_space; - - offset = LLVMConstInt(ctx->i32, idx * 16, false); - - ptr = ctx->shared_memory; - ptr = LLVMBuildGEP(ctx->builder, ptr, , 1, ""); - addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); - ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), ""); - return ptr; -} - static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t) { if (t == ctx->f16 || t == ctx->i16) @@ -2905,6 +2888,45 @@ load_gs_input(struct nir_to_llvm_context *ctx, return result; } +static LLVMValueRef +build_gep_for_deref(struct nir_to_llvm_context *ctx, + nir_deref_var *deref) +{ + struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var); + assert(entry->data); + LLVMValueRef val = entry->data; + nir_deref *tail = deref->deref.child; + while (tail != NULL) { + LLVMValueRef offset; + switch (tail->deref_type) { + case nir_deref_type_array: { + nir_deref_array *array = nir_deref_as_array(tail); + offset = LLVMConstInt(ctx->i32, array->base_offset, 0); + if (array->deref_array_type == + nir_deref_array_type_indirect) { + offset = LLVMBuildAdd(ctx->builder, offset, + get_src(ctx, + array->indirect), + ""); + } + break; + } + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = + nir_deref_as_struct(tail); + offset = LLVMConstInt(ctx->i32, + deref_struct->index, 0); + break; + } + default: + unreachable("bad deref type"); + } + val = ac_build_gep0(>ac, val, offset); + tail = tail->child; + } + return val; +} + static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { @@ -2966,6 +2988,14 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, } } break; + case nir_var_shared: { + LLVMValueRef address = build_gep_for_deref(ctx, + instr->variables[0]); + LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, ""); + return LLVMBuildBitCast(ctx->builder, val, +