On Thu, Jul 6, 2017 at 2:18 PM, Connor Abbott <cwabbo...@gmail.com> wrote: > On Thu, Jul 6, 2017 at 2:01 PM, Bas Nieuwenhuizen > <b...@basnieuwenhuizen.nl> wrote: >> On Thu, Jul 6, 2017 at 9:48 PM, Connor Abbott <conn...@valvesoftware.com> >> wrote: >>> From: Connor Abbott <cwabbo...@gmail.com> >>> >>> The old way was very TGSI-based, and couldn't handle indirect >>> dereferences at all. Instead, pass through the type information NIR has >> >> I think the old code should handle indirect derefs just fine? See the >> indir_index stuff. I'm kind of worried that LLVM doesn't promote >> variables with indirect derefs agrresively enough to registers. IIRC >> it only converts scalar arrays of up to 4 elements to vectors, while >> radv always promotes and IIRC radeonsi promotes for all variables with >> something like <= 20 dwords? > > Ah, true, I missed that while deleting it :). At the same time, if > LLVM isn't putting stuff into registers aggressively enough, that's > something that should be fixed in LLVM. Gathering everything into a > vector every time everything something is used, like what this code > does, is going to make it harder for LLVM to know what stuff aliases > and optimize accordingly since LLVM won't be able to use its normal > alias analysis. I don't know if LLVM will be able to optimize that > pattern as well.
After looking into it some more, I think LLVM won't promote allocas to registers at all when there are non-constant indices in the mix, and fixing it seems kinda involved. I guess a better solution for now would be to use nir_lower_locals_to_regs, since then each register/array can get its own vector. I'll respin the series to handle just shared variables, since that will fix the Feral issues, but I don't want to leave too much other churn for Nicolai. > >> >> >>> about local variables to LLVM, and translate NIR dereferences directly >>> into the equivalent GEP instructions in LLVM. >>> --- >>> src/amd/common/ac_nir_to_llvm.c | 204 >>> ++++++++++++++++++++++++++++------------ >>> 1 file changed, 146 insertions(+), 58 deletions(-) >>> >>> diff --git a/src/amd/common/ac_nir_to_llvm.c >>> b/src/amd/common/ac_nir_to_llvm.c >>> index e72747a..f42d214 100644 >>> --- a/src/amd/common/ac_nir_to_llvm.c >>> +++ b/src/amd/common/ac_nir_to_llvm.c >>> @@ -65,6 +65,7 @@ struct nir_to_llvm_context { >>> >>> struct hash_table *defs; >>> struct hash_table *phis; >>> + struct hash_table *vars; >>> >>> LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; >>> LLVMValueRef ring_offsets; >>> @@ -157,8 +158,6 @@ struct nir_to_llvm_context { >>> LLVMValueRef shared_memory; >>> uint64_t input_mask; >>> uint64_t output_mask; >>> - int num_locals; >>> - LLVMValueRef *locals; >>> uint8_t num_output_clips; >>> uint8_t num_output_culls; >>> >>> @@ -2905,6 +2904,45 @@ load_gs_input(struct nir_to_llvm_context *ctx, >>> return result; >>> } >>> >>> +static LLVMValueRef >>> +build_gep_for_deref(struct nir_to_llvm_context *ctx, >>> + nir_deref_var *deref) >>> +{ >>> + struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, >>> deref->var); >>> + assert(entry->data); >>> + LLVMValueRef val = entry->data; >>> + nir_deref *tail = deref->deref.child; >>> + while (tail != NULL) { >>> + LLVMValueRef offset; >>> + switch (tail->deref_type) { >>> + case nir_deref_type_array: { >>> + nir_deref_array *array = nir_deref_as_array(tail); >>> + offset = LLVMConstInt(ctx->i32, array->base_offset, >>> 0); >>> + if (array->deref_array_type == >>> + nir_deref_array_type_indirect) { >>> + offset = LLVMBuildAdd(ctx->builder, offset, >>> + get_src(ctx, >>> + >>> array->indirect), >>> + ""); >>> + } >>> + break; >>> + } >>> + case nir_deref_type_struct: { >>> + nir_deref_struct *deref_struct = >>> + nir_deref_as_struct(tail); >>> + offset = LLVMConstInt(ctx->i32, >>> + deref_struct->index, 0); >>> + break; >>> + } >>> + default: >>> + unreachable("bad deref type"); >>> + } >>> + val = ac_build_gep0(&ctx->ac, val, offset); >>> + tail = tail->child; >>> + } >>> + return val; >>> +} >>> + >>> static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, >>> nir_intrinsic_instr *instr) >>> { >>> @@ -2948,24 +2986,14 @@ static LLVMValueRef visit_load_var(struct >>> nir_to_llvm_context *ctx, >>> values[chan] = ctx->inputs[idx + chan + >>> const_index * 4]; >>> } >>> break; >>> - case nir_var_local: >>> - for (unsigned chan = 0; chan < ve; chan++) { >>> - if (indir_index) { >>> - unsigned count = glsl_count_attribute_slots( >>> - instr->variables[0]->var->type, >>> false); >>> - count -= chan / 4; >>> - LLVMValueRef tmp_vec = >>> ac_build_gather_values_extended( >>> - &ctx->ac, ctx->locals + idx >>> + chan, count, >>> - 4, true); >>> - >>> - values[chan] = >>> LLVMBuildExtractElement(ctx->builder, >>> - >>> tmp_vec, >>> - >>> indir_index, ""); >>> - } else { >>> - values[chan] = LLVMBuildLoad(ctx->builder, >>> ctx->locals[idx + chan + const_index * 4], ""); >>> - } >>> - } >>> - break; >>> + case nir_var_local: { >>> + LLVMValueRef address = build_gep_for_deref(ctx, >>> + >>> instr->variables[0]); >>> + LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, ""); >>> + return LLVMBuildBitCast(ctx->builder, val, >>> + get_def_type(ctx, &instr->dest.ssa), >>> + ""); >>> + } >>> case nir_var_shader_out: >>> if (ctx->stage == MESA_SHADER_TESS_CTRL) >>> return load_tcs_output(ctx, instr); >>> @@ -3079,31 +3107,36 @@ visit_store_var(struct nir_to_llvm_context *ctx, >>> } >>> } >>> break; >>> - case nir_var_local: >>> - for (unsigned chan = 0; chan < 8; chan++) { >>> - if (!(writemask & (1 << chan))) >>> - continue; >>> - >>> - value = llvm_extract_elem(ctx, src, chan); >>> - if (indir_index) { >>> - unsigned count = glsl_count_attribute_slots( >>> - instr->variables[0]->var->type, >>> false); >>> - count -= chan / 4; >>> - LLVMValueRef tmp_vec = >>> ac_build_gather_values_extended( >>> - &ctx->ac, ctx->locals + idx + chan, >>> count, >>> - 4, true); >>> - >>> - tmp_vec = >>> LLVMBuildInsertElement(ctx->builder, tmp_vec, >>> - value, >>> indir_index, ""); >>> - build_store_values_extended(ctx, >>> ctx->locals + idx + chan, >>> - count, 4, >>> tmp_vec); >>> - } else { >>> - temp_ptr = ctx->locals[idx + chan + >>> const_index * 4]; >>> - >>> - LLVMBuildStore(ctx->builder, value, >>> temp_ptr); >>> + case nir_var_local: { >>> + int writemask = instr->const_index[0]; >>> + LLVMValueRef address = build_gep_for_deref(ctx, >>> + >>> instr->variables[0]); >>> + LLVMValueRef val = get_src(ctx, instr->src[0]); >>> + unsigned components = >>> + glsl_get_vector_elements( >>> + >>> nir_deref_tail(&instr->variables[0]->deref)->type); >>> + if (writemask == (1 << components) - 1) { >>> + val = LLVMBuildBitCast( >>> + ctx->builder, val, >>> + LLVMGetElementType(LLVMTypeOf(address)), ""); >>> + LLVMBuildStore(ctx->builder, val, address); >>> + } else { >>> + for (unsigned chan = 0; chan < 4; chan++) { >>> + if (!(writemask & (1 << chan))) >>> + continue; >>> + LLVMValueRef ptr = >>> + LLVMBuildStructGEP(ctx->builder, >>> + address, chan, >>> ""); >>> + LLVMValueRef src = llvm_extract_elem(ctx, >>> val, >>> + chan); >>> + src = LLVMBuildBitCast( >>> + ctx->builder, src, >>> + LLVMGetElementType(LLVMTypeOf(ptr)), ""); >>> + LLVMBuildStore(ctx->builder, src, ptr); >>> } >>> } >>> break; >>> + } >>> case nir_var_shared: { >>> LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, >>> ctx->i32); >>> >>> @@ -5005,26 +5038,79 @@ handle_shader_output_decl(struct >>> nir_to_llvm_context *ctx, >>> ctx->output_mask |= mask_attribs; >>> } >>> >>> +static LLVMTypeRef >>> +glsl_base_to_llvm_type(struct nir_to_llvm_context *ctx, >>> + enum glsl_base_type type) >>> +{ >>> + switch (type) { >>> + case GLSL_TYPE_INT: >>> + case GLSL_TYPE_UINT: >>> + case GLSL_TYPE_BOOL: >>> + case GLSL_TYPE_SUBROUTINE: >>> + return ctx->i32; >>> + case GLSL_TYPE_FLOAT: /* TODO handle mediump */ >>> + return ctx->f32; >>> + case GLSL_TYPE_INT64: >>> + case GLSL_TYPE_UINT64: >>> + return ctx->i64; >>> + case GLSL_TYPE_DOUBLE: >>> + return ctx->f64; >>> + default: >>> + unreachable("unknown GLSL type"); >>> + } >>> +} >>> + >>> +static LLVMTypeRef >>> +glsl_to_llvm_type(struct nir_to_llvm_context *ctx, >>> + const struct glsl_type *type) >>> +{ >>> + if (glsl_type_is_scalar(type)) { >>> + return glsl_base_to_llvm_type(ctx, >>> glsl_get_base_type(type)); >>> + } >>> + >>> + if (glsl_type_is_vector(type)) { >>> + return LLVMVectorType( >>> + glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)), >>> + glsl_get_vector_elements(type)); >>> + } >>> + >>> + if (glsl_type_is_matrix(type)) { >>> + return LLVMArrayType( >>> + glsl_to_llvm_type(ctx, glsl_get_column_type(type)), >>> + glsl_get_matrix_columns(type)); >>> + } >>> + >>> + if (glsl_type_is_array(type)) { >>> + return LLVMArrayType( >>> + glsl_to_llvm_type(ctx, glsl_get_array_element(type)), >>> + glsl_get_length(type)); >>> + } >>> + >>> + assert(glsl_type_is_struct(type)); >>> + >>> + LLVMTypeRef member_types[glsl_get_length(type)]; >>> + >>> + for (unsigned i = 0; i < glsl_get_length(type); i++) { >>> + member_types[i] = >>> + glsl_to_llvm_type(ctx, >>> + glsl_get_struct_field(type, i)); >>> + } >>> + >>> + return LLVMStructTypeInContext(ctx->context, member_types, >>> + glsl_get_length(type), false); >>> +} >>> + >>> static void >>> setup_locals(struct nir_to_llvm_context *ctx, >>> struct nir_function *func) >>> { >>> - int i, j; >>> - ctx->num_locals = 0; >>> nir_foreach_variable(variable, &func->impl->locals) { >>> - unsigned attrib_count = >>> glsl_count_attribute_slots(variable->type, false); >>> - variable->data.driver_location = ctx->num_locals * 4; >>> - ctx->num_locals += attrib_count; >>> - } >>> - ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef)); >>> - if (!ctx->locals) >>> - return; >>> - >>> - for (i = 0; i < ctx->num_locals; i++) { >>> - for (j = 0; j < 4; j++) { >>> - ctx->locals[i * 4 + j] = >>> - si_build_alloca_undef(ctx, ctx->f32, >>> "temp"); >>> - } >>> + LLVMValueRef local = >>> + si_build_alloca_undef(ctx, >>> + glsl_to_llvm_type(ctx, >>> + >>> variable->type), >>> + variable->name ? >>> variable->name : ""); >>> + _mesa_hash_table_insert(ctx->vars, variable, local); >>> } >>> } >>> >>> @@ -6034,6 +6120,8 @@ LLVMModuleRef >>> ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, >>> _mesa_key_pointer_equal); >>> ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, >>> _mesa_key_pointer_equal); >>> + ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, >>> + _mesa_key_pointer_equal); >>> >>> func = (struct nir_function *)exec_list_get_head(&nir->functions); >>> >>> @@ -6048,9 +6136,9 @@ LLVMModuleRef >>> ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, >>> ac_llvm_finalize_module(&ctx); >>> >>> ac_nir_eliminate_const_vs_outputs(&ctx); >>> - free(ctx.locals); >>> ralloc_free(ctx.defs); >>> ralloc_free(ctx.phis); >>> + ralloc_free(ctx.vars); >>> >>> if (nir->stage == MESA_SHADER_GEOMETRY) { >>> unsigned addclip = ctx.num_output_clips + >>> ctx.num_output_culls > 4; >>> -- >>> 2.9.4 >>> >>> _______________________________________________ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev