On Thu, Jul 6, 2017 at 9:48 PM, Connor Abbott <conn...@valvesoftware.com> wrote: > From: Connor Abbott <cwabbo...@gmail.com> > > The old way was very TGSI-based, and couldn't handle indirect > dereferences at all. Instead, pass through the type information NIR has
I think the old code should handle indirect derefs just fine? See the indir_index stuff. I'm kind of worried that LLVM doesn't promote variables with indirect derefs agrresively enough to registers. IIRC it only converts scalar arrays of up to 4 elements to vectors, while radv always promotes and IIRC radeonsi promotes for all variables with something like <= 20 dwords? > about local variables to LLVM, and translate NIR dereferences directly > into the equivalent GEP instructions in LLVM. > --- > src/amd/common/ac_nir_to_llvm.c | 204 > ++++++++++++++++++++++++++++------------ > 1 file changed, 146 insertions(+), 58 deletions(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index e72747a..f42d214 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -65,6 +65,7 @@ struct nir_to_llvm_context { > > struct hash_table *defs; > struct hash_table *phis; > + struct hash_table *vars; > > LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; > LLVMValueRef ring_offsets; > @@ -157,8 +158,6 @@ struct nir_to_llvm_context { > LLVMValueRef shared_memory; > uint64_t input_mask; > uint64_t output_mask; > - int num_locals; > - LLVMValueRef *locals; > uint8_t num_output_clips; > uint8_t num_output_culls; > > @@ -2905,6 +2904,45 @@ load_gs_input(struct nir_to_llvm_context *ctx, > return result; > } > > +static LLVMValueRef > +build_gep_for_deref(struct nir_to_llvm_context *ctx, > + nir_deref_var *deref) > +{ > + struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, > deref->var); > + assert(entry->data); > + LLVMValueRef val = entry->data; > + nir_deref *tail = deref->deref.child; > + while (tail != NULL) { > + LLVMValueRef offset; > + switch (tail->deref_type) { > + case nir_deref_type_array: { > + nir_deref_array *array = nir_deref_as_array(tail); > + offset = LLVMConstInt(ctx->i32, array->base_offset, > 0); > + if (array->deref_array_type == > + nir_deref_array_type_indirect) { > + offset = LLVMBuildAdd(ctx->builder, offset, > + get_src(ctx, > + > array->indirect), > + ""); > + } > + break; > + } > + case nir_deref_type_struct: { > + nir_deref_struct *deref_struct = > + nir_deref_as_struct(tail); > + offset = LLVMConstInt(ctx->i32, > + deref_struct->index, 0); > + break; > + } > + default: > + unreachable("bad deref type"); > + } > + val = ac_build_gep0(&ctx->ac, val, offset); > + tail = tail->child; > + } > + return val; > +} > + > static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, > nir_intrinsic_instr *instr) > { > @@ -2948,24 +2986,14 @@ static LLVMValueRef visit_load_var(struct > nir_to_llvm_context *ctx, > values[chan] = ctx->inputs[idx + chan + > const_index * 4]; > } > break; > - case nir_var_local: > - for (unsigned chan = 0; chan < ve; chan++) { > - if (indir_index) { > - unsigned count = glsl_count_attribute_slots( > - instr->variables[0]->var->type, > false); > - count -= chan / 4; > - LLVMValueRef tmp_vec = > ac_build_gather_values_extended( > - &ctx->ac, ctx->locals + idx + > chan, count, > - 4, true); > - > - values[chan] = > LLVMBuildExtractElement(ctx->builder, > - > tmp_vec, > - > indir_index, ""); > - } else { > - values[chan] = LLVMBuildLoad(ctx->builder, > ctx->locals[idx + chan + const_index * 4], ""); > - } > - } > - break; > + case nir_var_local: { > + LLVMValueRef address = build_gep_for_deref(ctx, > + > instr->variables[0]); > + LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, ""); > + return LLVMBuildBitCast(ctx->builder, val, > + get_def_type(ctx, &instr->dest.ssa), > + ""); > + } > case nir_var_shader_out: > if (ctx->stage == MESA_SHADER_TESS_CTRL) > return load_tcs_output(ctx, instr); > @@ -3079,31 +3107,36 @@ visit_store_var(struct nir_to_llvm_context *ctx, > } > } > break; > - case nir_var_local: > - for (unsigned chan = 0; chan < 8; chan++) { > - if (!(writemask & (1 << chan))) > - continue; > - > - value = llvm_extract_elem(ctx, src, chan); > - if (indir_index) { > - unsigned count = glsl_count_attribute_slots( > - instr->variables[0]->var->type, > false); > - count -= chan / 4; > - LLVMValueRef tmp_vec = > ac_build_gather_values_extended( > - &ctx->ac, ctx->locals + idx + chan, > count, > - 4, true); > - > - tmp_vec = > LLVMBuildInsertElement(ctx->builder, tmp_vec, > - value, > indir_index, ""); > - build_store_values_extended(ctx, ctx->locals > + idx + chan, > - count, 4, > tmp_vec); > - } else { > - temp_ptr = ctx->locals[idx + chan + > const_index * 4]; > - > - LLVMBuildStore(ctx->builder, value, temp_ptr); > + case nir_var_local: { > + int writemask = instr->const_index[0]; > + LLVMValueRef address = build_gep_for_deref(ctx, > + > instr->variables[0]); > + LLVMValueRef val = get_src(ctx, instr->src[0]); > + unsigned components = > + glsl_get_vector_elements( > + nir_deref_tail(&instr->variables[0]->deref)->type); > + if (writemask == (1 << components) - 1) { > + val = LLVMBuildBitCast( > + ctx->builder, val, > + LLVMGetElementType(LLVMTypeOf(address)), ""); > + LLVMBuildStore(ctx->builder, val, address); > + } else { > + for (unsigned chan = 0; chan < 4; chan++) { > + if (!(writemask & (1 << chan))) > + continue; > + LLVMValueRef ptr = > + LLVMBuildStructGEP(ctx->builder, > + address, chan, ""); > + LLVMValueRef src = llvm_extract_elem(ctx, val, > + chan); > + src = LLVMBuildBitCast( > + ctx->builder, src, > + LLVMGetElementType(LLVMTypeOf(ptr)), ""); > + LLVMBuildStore(ctx->builder, src, ptr); > } > } > break; > + } > case nir_var_shared: { > LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); > > @@ -5005,26 +5038,79 @@ handle_shader_output_decl(struct nir_to_llvm_context > *ctx, > ctx->output_mask |= mask_attribs; > } > > +static LLVMTypeRef > +glsl_base_to_llvm_type(struct nir_to_llvm_context *ctx, > + enum glsl_base_type type) > +{ > + switch (type) { > + case GLSL_TYPE_INT: > + case GLSL_TYPE_UINT: > + case GLSL_TYPE_BOOL: > + case GLSL_TYPE_SUBROUTINE: > + return ctx->i32; > + case GLSL_TYPE_FLOAT: /* TODO handle mediump */ > + return ctx->f32; > + case GLSL_TYPE_INT64: > + case GLSL_TYPE_UINT64: > + return ctx->i64; > + case GLSL_TYPE_DOUBLE: > + return ctx->f64; > + default: > + unreachable("unknown GLSL type"); > + } > +} > + > +static LLVMTypeRef > +glsl_to_llvm_type(struct nir_to_llvm_context *ctx, > + const struct glsl_type *type) > +{ > + if (glsl_type_is_scalar(type)) { > + return glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)); > + } > + > + if (glsl_type_is_vector(type)) { > + return LLVMVectorType( > + glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)), > + glsl_get_vector_elements(type)); > + } > + > + if (glsl_type_is_matrix(type)) { > + return LLVMArrayType( > + glsl_to_llvm_type(ctx, glsl_get_column_type(type)), > + glsl_get_matrix_columns(type)); > + } > + > + if (glsl_type_is_array(type)) { > + return LLVMArrayType( > + glsl_to_llvm_type(ctx, glsl_get_array_element(type)), > + glsl_get_length(type)); > + } > + > + assert(glsl_type_is_struct(type)); > + > + LLVMTypeRef member_types[glsl_get_length(type)]; > + > + for (unsigned i = 0; i < glsl_get_length(type); i++) { > + member_types[i] = > + glsl_to_llvm_type(ctx, > + glsl_get_struct_field(type, i)); > + } > + > + return LLVMStructTypeInContext(ctx->context, member_types, > + glsl_get_length(type), false); > +} > + > static void > setup_locals(struct nir_to_llvm_context *ctx, > struct nir_function *func) > { > - int i, j; > - ctx->num_locals = 0; > nir_foreach_variable(variable, &func->impl->locals) { > - unsigned attrib_count = > glsl_count_attribute_slots(variable->type, false); > - variable->data.driver_location = ctx->num_locals * 4; > - ctx->num_locals += attrib_count; > - } > - ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef)); > - if (!ctx->locals) > - return; > - > - for (i = 0; i < ctx->num_locals; i++) { > - for (j = 0; j < 4; j++) { > - ctx->locals[i * 4 + j] = > - si_build_alloca_undef(ctx, ctx->f32, "temp"); > - } > + LLVMValueRef local = > + si_build_alloca_undef(ctx, > + glsl_to_llvm_type(ctx, > + > variable->type), > + variable->name ? variable->name > : ""); > + _mesa_hash_table_insert(ctx->vars, variable, local); > } > } > > @@ -6034,6 +6120,8 @@ LLVMModuleRef > ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, > _mesa_key_pointer_equal); > ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, > _mesa_key_pointer_equal); > + ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, > + _mesa_key_pointer_equal); > > func = (struct nir_function *)exec_list_get_head(&nir->functions); > > @@ -6048,9 +6136,9 @@ LLVMModuleRef > ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, > ac_llvm_finalize_module(&ctx); > > ac_nir_eliminate_const_vs_outputs(&ctx); > - free(ctx.locals); > ralloc_free(ctx.defs); > ralloc_free(ctx.phis); > + ralloc_free(ctx.vars); > > if (nir->stage == MESA_SHADER_GEOMETRY) { > unsigned addclip = ctx.num_output_clips + > ctx.num_output_culls > 4; > -- > 2.9.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev