From: Connor Abbott <cwabbo...@gmail.com> Similar to before, do the direct NIR->LLVM translation instead of lowering to an array then back to a variable. This should fix indirect dereferences, make shared variables more tightly packed, and make LLVM's alias analysis more precise. --- src/amd/common/ac_nir_to_llvm.c | 116 ++++++++-------------------------------- 1 file changed, 23 insertions(+), 93 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index f42d214..743cc1d 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -155,7 +155,6 @@ struct nir_to_llvm_context { LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4]; - LLVMValueRef shared_memory; uint64_t input_mask; uint64_t output_mask; uint8_t num_output_clips; @@ -386,23 +385,6 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) CONST_ADDR_SPACE); } -static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx, - int idx, - LLVMTypeRef type) -{ - LLVMValueRef offset; - LLVMValueRef ptr; - int addr_space; - - offset = LLVMConstInt(ctx->i32, idx * 16, false); - - ptr = ctx->shared_memory; - ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, ""); - addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); - ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), ""); - return ptr; -} - static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t) { if (t == ctx->f16 || t == ctx->i16) @@ -2986,7 +2968,8 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, values[chan] = ctx->inputs[idx + chan + const_index * 4]; } break; - case nir_var_local: { + case nir_var_local: + case nir_var_shared: { LLVMValueRef address = build_gep_for_deref(ctx, instr->variables[0]); LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, ""); @@ -3016,23 +2999,6 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, } } break; - case nir_var_shared: { - LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); - LLVMValueRef derived_ptr; - - if (indir_index) - indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), ""); - - for (unsigned chan = 0; chan < ve; chan++) { - LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false); - if (indir_index) - index = LLVMBuildAdd(ctx->builder, index, indir_index, ""); - derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""); - - values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, ""); - } - break; - } default: unreachable("unhandle variable mode"); } @@ -3107,7 +3073,8 @@ visit_store_var(struct nir_to_llvm_context *ctx, } } break; - case nir_var_local: { + case nir_var_local: + case nir_var_shared: { int writemask = instr->const_index[0]; LLVMValueRef address = build_gep_for_deref(ctx, instr->variables[0]); @@ -3137,28 +3104,6 @@ visit_store_var(struct nir_to_llvm_context *ctx, } break; } - case nir_var_shared: { - LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); - - if (indir_index) - indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), ""); - - for (unsigned chan = 0; chan < 8; chan++) { - if (!(writemask & (1 << chan))) - continue; - LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false); - LLVMValueRef derived_ptr; - - if (indir_index) - index = LLVMBuildAdd(ctx->builder, index, indir_index, ""); - - value = llvm_extract_elem(ctx, src, chan); - derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""); - LLVMBuildStore(ctx->builder, - to_integer(&ctx->ac, value), derived_ptr); - } - break; - } default: break; } @@ -3637,9 +3582,8 @@ static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx, const nir_intrinsic_instr *instr) { LLVMValueRef ptr, result; - int idx = instr->variables[0]->var->data.driver_location; LLVMValueRef src = get_src(ctx, instr->src[0]); - ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); + ptr = build_gep_for_deref(ctx, instr->variables[0]); if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) { LLVMValueRef src1 = get_src(ctx, instr->src[1]); @@ -5114,6 +5058,20 @@ setup_locals(struct nir_to_llvm_context *ctx, } } +static void +setup_shared(struct nir_to_llvm_context *ctx, + struct nir_shader *nir) +{ + nir_foreach_variable(variable, &nir->shared) { + LLVMValueRef shared = + LLVMAddGlobalInAddressSpace( + ctx->module, glsl_to_llvm_type(ctx, variable->type), + variable->name ? variable->name : "", + LOCAL_ADDR_SPACE); + _mesa_hash_table_insert(ctx->vars, variable, shared); + } +} + static LLVMValueRef emit_float_saturate(struct ac_llvm_context *ctx, LLVMValueRef v, float lo, float hi) { @@ -5907,15 +5865,6 @@ handle_shader_outputs_post(struct nir_to_llvm_context *ctx) } } -static void -handle_shared_compute_var(struct nir_to_llvm_context *ctx, - struct nir_variable *variable, uint32_t *offset, int idx) -{ - unsigned size = glsl_count_attribute_slots(variable->type, false); - variable->data.driver_location = *offset; - *offset += size; -} - static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx) { LLVMPassManagerRef passmgr; @@ -6072,29 +6021,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, create_function(&ctx); - if (nir->stage == MESA_SHADER_COMPUTE) { - int num_shared = 0; - nir_foreach_variable(variable, &nir->shared) - num_shared++; - if (num_shared) { - int idx = 0; - uint32_t shared_size = 0; - LLVMValueRef var; - LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE); - nir_foreach_variable(variable, &nir->shared) { - handle_shared_compute_var(&ctx, variable, &shared_size, idx); - idx++; - } - - shared_size *= 16; - var = LLVMAddGlobalInAddressSpace(ctx.module, - LLVMArrayType(ctx.i8, shared_size), - "compute_lds", - LOCAL_ADDR_SPACE); - LLVMSetAlignment(var, 4); - ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, ""); - } - } else if (nir->stage == MESA_SHADER_GEOMETRY) { + if (nir->stage == MESA_SHADER_GEOMETRY) { ctx.gs_next_vertex = ac_build_alloca(&ctx, ctx.i32, "gs_next_vertex"); ctx.gs_max_out_vertices = nir->info.gs.vertices_out; @@ -6127,6 +6054,9 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, setup_locals(&ctx, func); + if (nir->stage == MESA_SHADER_COMPUTE) + setup_shared(&ctx, nir); + visit_cf_list(&ctx, &func->impl->body); phi_post_pass(&ctx); -- 2.9.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev