From: Connor Abbott <cwabbo...@gmail.com>

Similar to before, do the direct NIR->LLVM translation instead of
lowering to an array then back to a variable. This should fix indirect
dereferences, make shared variables more tightly packed, and make LLVM's
alias analysis more precise.
---
 src/amd/common/ac_nir_to_llvm.c | 116 ++++++++--------------------------------
 1 file changed, 23 insertions(+), 93 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index f42d214..743cc1d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -155,7 +155,6 @@ struct nir_to_llvm_context {
        LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
        LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
 
-       LLVMValueRef shared_memory;
        uint64_t input_mask;
        uint64_t output_mask;
        uint8_t num_output_clips;
@@ -386,23 +385,6 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int 
num_elements)
                               CONST_ADDR_SPACE);
 }
 
-static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
-                                         int idx,
-                                         LLVMTypeRef type)
-{
-       LLVMValueRef offset;
-       LLVMValueRef ptr;
-       int addr_space;
-
-       offset = LLVMConstInt(ctx->i32, idx * 16, false);
-
-       ptr = ctx->shared_memory;
-       ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, "");
-       addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
-       ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, 
addr_space), "");
-       return ptr;
-}
-
 static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, 
LLVMTypeRef t)
 {
        if (t == ctx->f16 || t == ctx->i16)
@@ -2986,7 +2968,8 @@ static LLVMValueRef visit_load_var(struct 
nir_to_llvm_context *ctx,
                                values[chan] = ctx->inputs[idx + chan + 
const_index * 4];
                }
                break;
-       case nir_var_local: {
+       case nir_var_local:
+       case nir_var_shared: {
                LLVMValueRef address = build_gep_for_deref(ctx,
                                                           instr->variables[0]);
                LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, "");
@@ -3016,23 +2999,6 @@ static LLVMValueRef visit_load_var(struct 
nir_to_llvm_context *ctx,
                        }
                }
                break;
-       case nir_var_shared: {
-               LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
-               LLVMValueRef derived_ptr;
-
-               if (indir_index)
-                       indir_index = LLVMBuildMul(ctx->builder, indir_index, 
LLVMConstInt(ctx->i32, 4, false), "");
-
-               for (unsigned chan = 0; chan < ve; chan++) {
-                       LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 
false);
-                       if (indir_index)
-                               index = LLVMBuildAdd(ctx->builder, index, 
indir_index, "");
-                       derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 
1, "");
-
-                       values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, 
"");
-               }
-               break;
-       }
        default:
                unreachable("unhandle variable mode");
        }
@@ -3107,7 +3073,8 @@ visit_store_var(struct nir_to_llvm_context *ctx,
                        }
                }
                break;
-       case nir_var_local: {
+       case nir_var_local:
+       case nir_var_shared: {
                int writemask = instr->const_index[0];
                LLVMValueRef address = build_gep_for_deref(ctx,
                                                           instr->variables[0]);
@@ -3137,28 +3104,6 @@ visit_store_var(struct nir_to_llvm_context *ctx,
                }
                break;
        }
-       case nir_var_shared: {
-               LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
-
-               if (indir_index)
-                       indir_index = LLVMBuildMul(ctx->builder, indir_index, 
LLVMConstInt(ctx->i32, 4, false), "");
-
-               for (unsigned chan = 0; chan < 8; chan++) {
-                       if (!(writemask & (1 << chan)))
-                               continue;
-                       LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 
false);
-                       LLVMValueRef derived_ptr;
-
-                       if (indir_index)
-                               index = LLVMBuildAdd(ctx->builder, index, 
indir_index, "");
-
-                       value = llvm_extract_elem(ctx, src, chan);
-                       derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 
1, "");
-                       LLVMBuildStore(ctx->builder,
-                                      to_integer(&ctx->ac, value), 
derived_ptr);
-               }
-               break;
-       }
        default:
                break;
        }
@@ -3637,9 +3582,8 @@ static LLVMValueRef visit_var_atomic(struct 
nir_to_llvm_context *ctx,
                                     const nir_intrinsic_instr *instr)
 {
        LLVMValueRef ptr, result;
-       int idx = instr->variables[0]->var->data.driver_location;
        LLVMValueRef src = get_src(ctx, instr->src[0]);
-       ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
+       ptr = build_gep_for_deref(ctx, instr->variables[0]);
 
        if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
                LLVMValueRef src1 = get_src(ctx, instr->src[1]);
@@ -5114,6 +5058,20 @@ setup_locals(struct nir_to_llvm_context *ctx,
        }
 }
 
+static void
+setup_shared(struct nir_to_llvm_context *ctx,
+            struct nir_shader *nir)
+{
+       nir_foreach_variable(variable, &nir->shared) {
+               LLVMValueRef shared =
+                       LLVMAddGlobalInAddressSpace(
+                          ctx->module, glsl_to_llvm_type(ctx, variable->type),
+                          variable->name ? variable->name : "",
+                          LOCAL_ADDR_SPACE);
+               _mesa_hash_table_insert(ctx->vars, variable, shared);
+       }
+}
+
 static LLVMValueRef
 emit_float_saturate(struct ac_llvm_context *ctx, LLVMValueRef v, float lo, 
float hi)
 {
@@ -5907,15 +5865,6 @@ handle_shader_outputs_post(struct nir_to_llvm_context 
*ctx)
        }
 }
 
-static void
-handle_shared_compute_var(struct nir_to_llvm_context *ctx,
-                         struct nir_variable *variable, uint32_t *offset, int 
idx)
-{
-       unsigned size = glsl_count_attribute_slots(variable->type, false);
-       variable->data.driver_location = *offset;
-       *offset += size;
-}
-
 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
 {
        LLVMPassManagerRef passmgr;
@@ -6072,29 +6021,7 @@ LLVMModuleRef 
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 
        create_function(&ctx);
 
-       if (nir->stage == MESA_SHADER_COMPUTE) {
-               int num_shared = 0;
-               nir_foreach_variable(variable, &nir->shared)
-                       num_shared++;
-               if (num_shared) {
-                       int idx = 0;
-                       uint32_t shared_size = 0;
-                       LLVMValueRef var;
-                       LLVMTypeRef i8p = LLVMPointerType(ctx.i8, 
LOCAL_ADDR_SPACE);
-                       nir_foreach_variable(variable, &nir->shared) {
-                               handle_shared_compute_var(&ctx, variable, 
&shared_size, idx);
-                               idx++;
-                       }
-
-                       shared_size *= 16;
-                       var = LLVMAddGlobalInAddressSpace(ctx.module,
-                                                         LLVMArrayType(ctx.i8, 
shared_size),
-                                                         "compute_lds",
-                                                         LOCAL_ADDR_SPACE);
-                       LLVMSetAlignment(var, 4);
-                       ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, 
i8p, "");
-               }
-       } else if (nir->stage == MESA_SHADER_GEOMETRY) {
+       if (nir->stage == MESA_SHADER_GEOMETRY) {
                ctx.gs_next_vertex = ac_build_alloca(&ctx, ctx.i32, 
"gs_next_vertex");
 
                ctx.gs_max_out_vertices = nir->info.gs.vertices_out;
@@ -6127,6 +6054,9 @@ LLVMModuleRef 
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 
        setup_locals(&ctx, func);
 
+       if (nir->stage == MESA_SHADER_COMPUTE)
+               setup_shared(&ctx, nir);
+
        visit_cf_list(&ctx, &func->impl->body);
        phi_post_pass(&ctx);
 
-- 
2.9.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to