Module: Mesa
Branch: master
Commit: 74470baebbdacc8fd31c9912eb8c00c0cd102903
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=74470baebbdacc8fd31c9912eb8c00c0cd102903

Author: Connor Abbott <[email protected]>
Date:   Wed Jun 12 18:58:13 2019 +0200

ac/nir: Lower large indirect variables to scratch

results from radeonsi NIR:

Totals from affected shaders:
SGPRS: 704 -> 464 (-34.09 %)
VGPRS: 2056 -> 672 (-67.32 %)
Spilled SGPRs: 24 -> 0 (-100.00 %)
Spilled VGPRs: 28406 -> 0 (-100.00 %)
Private memory VGPRs: 0 -> 3182 (0.00 %)
Scratch size: 1064 -> 3228 (203.38 %) dwords per thread
Code Size: 935260 -> 40180 (-95.70 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 28 -> 70 (150.00 %)
Wait states: 0 -> 0 (0.00 %)

results from radv:

Totals from affected shaders:
SGPRS: 80 -> 48 (-40.00 %)
VGPRS: 204 -> 108 (-47.06 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 256 (0.00 %) dwords per thread
Code Size: 15792 -> 9504 (-39.82 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 1 -> 2 (100.00 %)
Wait states: 0 -> 0 (0.00 %)

Reviewed-by: Bas Nieuwenhuizen <[email protected]>

---

 src/amd/common/ac_nir_to_llvm.c | 54 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 53b93f7e1d3..5e25e838f8f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -42,6 +42,8 @@ struct ac_nir_context {
 
        LLVMValueRef *ssa_defs;
 
+       LLVMValueRef scratch;
+
        struct hash_table *defs;
        struct hash_table *phis;
        struct hash_table *vars;
@@ -3573,6 +3575,36 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
        case nir_intrinsic_mbcnt_amd:
                result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
                break;
+       case nir_intrinsic_load_scratch: {
+               LLVMValueRef offset = get_src(ctx, instr->src[0]);
+               LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
+                                                offset);
+               LLVMTypeRef comp_type =
+                       LLVMIntTypeInContext(ctx->ac.context, 
instr->dest.ssa.bit_size);
+               LLVMTypeRef vec_type =
+                       instr->dest.ssa.num_components == 1 ? comp_type :
+                       LLVMVectorType(comp_type, 
instr->dest.ssa.num_components);
+               unsigned addr_space = 
LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+               ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
+                                      LLVMPointerType(vec_type, addr_space), 
"");
+               result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+               break;
+       }
+       case nir_intrinsic_store_scratch: {
+               LLVMValueRef offset = get_src(ctx, instr->src[1]);
+               LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
+                                                offset);
+               LLVMTypeRef comp_type =
+                       LLVMIntTypeInContext(ctx->ac.context, 
instr->src[0].ssa->bit_size);
+               LLVMTypeRef vec_type =
+                       instr->src[0].ssa->num_components == 1 ? comp_type :
+                       LLVMVectorType(comp_type, 
instr->src[0].ssa->num_components);
+               unsigned addr_space = 
LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+               ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
+                                      LLVMPointerType(vec_type, addr_space), 
"");
+               LLVMBuildStore(ctx->ac.builder, get_src(ctx, instr->src[0]), 
ptr);
+               break;
+       }
        default:
                fprintf(stderr, "Unknown intrinsic: ");
                nir_print_instr(&instr->instr, stderr);
@@ -4474,6 +4506,18 @@ setup_locals(struct ac_nir_context *ctx,
 }
 
 static void
+setup_scratch(struct ac_nir_context *ctx,
+             struct nir_shader *shader)
+{
+       if (shader->scratch_size == 0)
+               return;
+
+       ctx->scratch = ac_build_alloca_undef(&ctx->ac,
+                                            LLVMArrayType(ctx->ac.i8, 
shader->scratch_size),
+                                            "scratch");
+}
+
+static void
 setup_shared(struct ac_nir_context *ctx,
             struct nir_shader *nir)
 {
@@ -4518,6 +4562,7 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct 
ac_shader_abi *abi,
        ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
 
        setup_locals(&ctx, func);
+       setup_scratch(&ctx, nir);
 
        if (gl_shader_stage_is_compute(nir->info.stage))
                setup_shared(&ctx, nir);
@@ -4539,6 +4584,15 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct 
ac_shader_abi *abi,
 void
 ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
 {
+       /* Lower large variables to scratch first so that we won't bloat the
+        * shader by generating large if ladders for them. We later lower
+        * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
+        */
+       NIR_PASS_V(nir, nir_lower_vars_to_scratch,
+                  nir_var_function_temp,
+                  256,
+                  glsl_get_natural_size_align_bytes);
+
        /* While it would be nice not to have this flag, we are constrained
         * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9.
         */

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to