On 10.09.2016 00:40, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

LLVM can CSE the loads, thus we can always re-load constants before each
use. The decrease in SGPR spilling is huge.

The best improvements are the dumbest ones.

Indeed :)

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


26011 shaders in 14651 tests
Totals:
SGPRS: 1453346 -> 1251920 (-13.86 %)
VGPRS: 742576 -> 728421 (-1.91 %)
Spilled SGPRs: 52298 -> 16644 (-68.17 %)
Spilled VGPRs: 397 -> 369 (-7.05 %)
Scratch VGPRs: 1372 -> 1344 (-2.04 %) dwords per thread
Code Size: 36136488 -> 36001064 (-0.37 %) bytes
LDS: 767 -> 767 (0.00 %) blocks
Max Waves: 219315 -> 222221 (1.33 %)
---
 src/gallium/drivers/radeonsi/si_shader.c | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 0b7de18..08e3cee 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1874,26 +1874,33 @@ static LLVMValueRef fetch_constant(
                for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
                        values[chan] = fetch_constant(bld_base, reg, type, 
chan);

                return lp_build_gather_values(bld_base->base.gallivm, values, 
4);
        }

        buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
        idx = reg->Register.Index * 4 + swizzle;

        if (!reg->Register.Indirect && !reg->Dimension.Indirect) {
+               LLVMValueRef c0, c1;
+
+               c0 = buffer_load_const(ctx, ctx->const_buffers[buf],
+                                      LLVMConstInt(ctx->i32, idx * 4, 0));
+
                if (!tgsi_type_is_64bit(type))
-                       return bitcast(bld_base, type, 
ctx->constants[buf][idx]);
+                       return bitcast(bld_base, type, c0);
                else {
+                       c1 = buffer_load_const(ctx, ctx->const_buffers[buf],
+                                              LLVMConstInt(ctx->i32,
+                                                           (idx + 1) * 4, 0));
                        return radeon_llvm_emit_fetch_64bit(bld_base, type,
-                                                           
ctx->constants[buf][idx],
-                                                           
ctx->constants[buf][idx + 1]);
+                                                           c0, c1);
                }
        }

        if (reg->Register.Dimension && reg->Dimension.Indirect) {
                LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, 
SI_PARAM_CONST_BUFFERS);
                LLVMValueRef index;
                index = get_bounded_indirect_index(ctx, &reg->DimIndirect,
                                                   reg->Dimension.Index,
                                                   SI_NUM_CONST_BUFFERS);
                bufp = build_indexed_load_const(ctx, ptr, index);
@@ -5789,39 +5796,26 @@ static void create_function(struct si_shader_context 
*ctx)

 static void preload_constants(struct si_shader_context *ctx)
 {
        struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        const struct tgsi_shader_info *info = bld_base->info;
        unsigned buf;
        LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, 
SI_PARAM_CONST_BUFFERS);

        for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) {
-               unsigned i, num_const = info->const_file_max[buf] + 1;
-
-               if (num_const == 0)
+               if (info->const_file_max[buf] == -1)
                        continue;

-               /* Allocate space for the constant values */
-               ctx->constants[buf] = CALLOC(num_const * 4, 
sizeof(LLVMValueRef));
-
                /* Load the resource descriptor */
                ctx->const_buffers[buf] =
                        build_indexed_load_const(ctx, ptr, 
lp_build_const_int32(gallivm, buf));
-
-               /* Load the constants, we rely on the code sinking to do the 
rest */
-               for (i = 0; i < num_const * 4; ++i) {
-                       ctx->constants[buf][i] =
-                               buffer_load_const(ctx,
-                                       ctx->const_buffers[buf],
-                                       lp_build_const_int32(gallivm, i * 4));
-               }
        }
 }

 static void preload_shader_buffers(struct si_shader_context *ctx)
 {
        struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
        LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, 
SI_PARAM_SHADER_BUFFERS);
        int buf, maxbuf;

        maxbuf = MIN2(ctx->shader->selector->info.file_max[TGSI_FILE_BUFFER],
@@ -6898,22 +6892,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                ctx.shader = shader->gs_copy_shader;
                if ((r = si_generate_gs_copy_shader(sscreen, &ctx,
                                                    shader, debug))) {
                        free(shader->gs_copy_shader);
                        shader->gs_copy_shader = NULL;
                        goto out;
                }
        }

 out:
-       for (int i = 0; i < SI_NUM_CONST_BUFFERS; i++)
-               FREE(ctx.constants[i]);
        return r;
 }

 /**
  * Create, compile and return a shader part (prolog or epilog).
  *
  * \param sscreen      screen
  * \param list         list of shader parts of the same category
  * \param key          shader part key
  * \param tm           LLVM target machine

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to