Patches 3-4 look technically correct to me, so for just using it for shared vars
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen> On Thu, Jul 6, 2017 at 9:48 PM, Connor Abbott <conn...@valvesoftware.com> wrote: > From: Connor Abbott <cwabbo...@gmail.com> > > Similar to before, do the direct NIR->LLVM translation instead of > lowering to an array then back to a variable. This should fix indirect > dereferences, make shared variables more tightly packed, and make LLVM's > alias analysis more precise. > --- > src/amd/common/ac_nir_to_llvm.c | 116 > ++++++++-------------------------------- > 1 file changed, 23 insertions(+), 93 deletions(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index f42d214..743cc1d 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -155,7 +155,6 @@ struct nir_to_llvm_context { > LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; > LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4]; > > - LLVMValueRef shared_memory; > uint64_t input_mask; > uint64_t output_mask; > uint8_t num_output_clips; > @@ -386,23 +385,6 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, > int num_elements) > CONST_ADDR_SPACE); > } > > -static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx, > - int idx, > - LLVMTypeRef type) > -{ > - LLVMValueRef offset; > - LLVMValueRef ptr; > - int addr_space; > - > - offset = LLVMConstInt(ctx->i32, idx * 16, false); > - > - ptr = ctx->shared_memory; > - ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, ""); > - addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); > - ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, > addr_space), ""); > - return ptr; > -} > - > static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, > LLVMTypeRef t) > { > if (t == ctx->f16 || t == ctx->i16) > @@ -2986,7 +2968,8 @@ static LLVMValueRef visit_load_var(struct > nir_to_llvm_context *ctx, > values[chan] = ctx->inputs[idx + chan + > const_index * 4]; > } > break; > - case nir_var_local: { > + case nir_var_local: > + case nir_var_shared: { > LLVMValueRef address = build_gep_for_deref(ctx, > > instr->variables[0]); > LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, ""); > @@ -3016,23 +2999,6 @@ static LLVMValueRef visit_load_var(struct > nir_to_llvm_context *ctx, > } > } > break; > - case nir_var_shared: { > - LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); > - LLVMValueRef derived_ptr; > - > - if (indir_index) > - indir_index = LLVMBuildMul(ctx->builder, indir_index, > LLVMConstInt(ctx->i32, 4, false), ""); > - > - for (unsigned chan = 0; chan < ve; chan++) { > - LLVMValueRef index = LLVMConstInt(ctx->i32, chan, > false); > - if (indir_index) > - index = LLVMBuildAdd(ctx->builder, index, > indir_index, ""); > - derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, > 1, ""); > - > - values[chan] = LLVMBuildLoad(ctx->builder, > derived_ptr, ""); > - } > - break; > - } > default: > unreachable("unhandle variable mode"); > } > @@ -3107,7 +3073,8 @@ visit_store_var(struct nir_to_llvm_context *ctx, > } > } > break; > - case nir_var_local: { > + case nir_var_local: > + case nir_var_shared: { > int writemask = instr->const_index[0]; > LLVMValueRef address = build_gep_for_deref(ctx, > > instr->variables[0]); > @@ -3137,28 +3104,6 @@ visit_store_var(struct nir_to_llvm_context *ctx, > } > break; > } > - case nir_var_shared: { > - LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); > - > - if (indir_index) > - indir_index = LLVMBuildMul(ctx->builder, indir_index, > LLVMConstInt(ctx->i32, 4, false), ""); > - > - for (unsigned chan = 0; chan < 8; chan++) { > - if (!(writemask & (1 << chan))) > - continue; > - LLVMValueRef index = LLVMConstInt(ctx->i32, chan, > false); > - LLVMValueRef derived_ptr; > - > - if (indir_index) > - index = LLVMBuildAdd(ctx->builder, index, > indir_index, ""); > - > - value = llvm_extract_elem(ctx, src, chan); > - derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, > 1, ""); > - LLVMBuildStore(ctx->builder, > - to_integer(&ctx->ac, value), > derived_ptr); > - } > - break; > - } > default: > break; > } > @@ -3637,9 +3582,8 @@ static LLVMValueRef visit_var_atomic(struct > nir_to_llvm_context *ctx, > const nir_intrinsic_instr *instr) > { > LLVMValueRef ptr, result; > - int idx = instr->variables[0]->var->data.driver_location; > LLVMValueRef src = get_src(ctx, instr->src[0]); > - ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); > + ptr = build_gep_for_deref(ctx, instr->variables[0]); > > if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) { > LLVMValueRef src1 = get_src(ctx, instr->src[1]); > @@ -5114,6 +5058,20 @@ setup_locals(struct nir_to_llvm_context *ctx, > } > } > > +static void > +setup_shared(struct nir_to_llvm_context *ctx, > + struct nir_shader *nir) > +{ > + nir_foreach_variable(variable, &nir->shared) { > + LLVMValueRef shared = > + LLVMAddGlobalInAddressSpace( > + ctx->module, glsl_to_llvm_type(ctx, > variable->type), > + variable->name ? variable->name : "", > + LOCAL_ADDR_SPACE); > + _mesa_hash_table_insert(ctx->vars, variable, shared); > + } > +} > + > static LLVMValueRef > emit_float_saturate(struct ac_llvm_context *ctx, LLVMValueRef v, float lo, > float hi) > { > @@ -5907,15 +5865,6 @@ handle_shader_outputs_post(struct nir_to_llvm_context > *ctx) > } > } > > -static void > -handle_shared_compute_var(struct nir_to_llvm_context *ctx, > - struct nir_variable *variable, uint32_t *offset, > int idx) > -{ > - unsigned size = glsl_count_attribute_slots(variable->type, false); > - variable->data.driver_location = *offset; > - *offset += size; > -} > - > static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx) > { > LLVMPassManagerRef passmgr; > @@ -6072,29 +6021,7 @@ LLVMModuleRef > ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, > > create_function(&ctx); > > - if (nir->stage == MESA_SHADER_COMPUTE) { > - int num_shared = 0; > - nir_foreach_variable(variable, &nir->shared) > - num_shared++; > - if (num_shared) { > - int idx = 0; > - uint32_t shared_size = 0; > - LLVMValueRef var; > - LLVMTypeRef i8p = LLVMPointerType(ctx.i8, > LOCAL_ADDR_SPACE); > - nir_foreach_variable(variable, &nir->shared) { > - handle_shared_compute_var(&ctx, variable, > &shared_size, idx); > - idx++; > - } > - > - shared_size *= 16; > - var = LLVMAddGlobalInAddressSpace(ctx.module, > - > LLVMArrayType(ctx.i8, shared_size), > - "compute_lds", > - LOCAL_ADDR_SPACE); > - LLVMSetAlignment(var, 4); > - ctx.shared_memory = LLVMBuildBitCast(ctx.builder, > var, i8p, ""); > - } > - } else if (nir->stage == MESA_SHADER_GEOMETRY) { > + if (nir->stage == MESA_SHADER_GEOMETRY) { > ctx.gs_next_vertex = ac_build_alloca(&ctx, ctx.i32, > "gs_next_vertex"); > > ctx.gs_max_out_vertices = nir->info.gs.vertices_out; > @@ -6127,6 +6054,9 @@ LLVMModuleRef > ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, > > setup_locals(&ctx, func); > > + if (nir->stage == MESA_SHADER_COMPUTE) > + setup_shared(&ctx, nir); > + > visit_cf_list(&ctx, &func->impl->body); > phi_post_pass(&ctx); > > -- > 2.9.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev