From: Marek Olšák <marek.ol...@amd.com> 26011 shaders in 14651 tests Totals: SGPRS: 1152636 -> 1146340 (-0.55 %) VGPRS: 728198 -> 727371 (-0.11 %) Spilled SGPRs: 3776 -> 2218 (-41.26 %) Spilled VGPRs: 369 -> 369 (0.00 %) Scratch VGPRs: 1344 -> 1344 (0.00 %) dwords per thread Code Size: 35835152 -> 35841268 (0.02 %) bytes LDS: 767 -> 767 (0.00 %) blocks Max Waves: 222372 -> 222559 (0.08 %) Wait states: 0 -> 0 (0.00 %) --- src/gallium/drivers/radeonsi/si_shader.c | 38 ++++++++++++-------------------- 1 file changed, 14 insertions(+), 24 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c96c52e..faa5363 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -99,21 +99,20 @@ struct si_shader_context int param_tess_offchip; LLVMTargetMachineRef tm; unsigned invariant_load_md_kind; unsigned range_md_kind; unsigned uniform_md_kind; LLVMValueRef empty_md; /* Preloaded descriptors. */ - LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS]; LLVMValueRef esgs_ring; LLVMValueRef gsvs_ring[4]; LLVMValueRef lds; LLVMValueRef gs_next_vertex[4]; LLVMValueRef return_value; LLVMTypeRef voidt; LLVMTypeRef i1; LLVMTypeRef i8; @@ -1842,20 +1841,29 @@ static void declare_compute_memory(struct radeon_llvm_context *radeon_bld, var = LLVMAddGlobalInAddressSpace(gallivm->module, LLVMArrayType(ctx->i8, sel->local_size), "compute_lds", LOCAL_ADDR_SPACE); LLVMSetAlignment(var, 4); ctx->shared_memory = LLVMBuildBitCast(gallivm->builder, var, i8p, ""); } +static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i) +{ + LLVMValueRef list_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_CONST_BUFFERS); + + return build_indexed_load_const(ctx, list_ptr, + LLVMConstInt(ctx->i32, i, 0)); +} + static LLVMValueRef fetch_constant( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { struct si_shader_context *ctx = si_shader_context(bld_base); struct lp_build_context *base = &bld_base->base; const struct tgsi_ind_register *ireg = ®->Indirect; unsigned buf, idx; @@ -1869,45 +1877,46 @@ static LLVMValueRef fetch_constant( for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) values[chan] = fetch_constant(bld_base, reg, type, chan); return lp_build_gather_values(bld_base->base.gallivm, values, 4); } buf = reg->Register.Dimension ? reg->Dimension.Index : 0; idx = reg->Register.Index * 4 + swizzle; if (!reg->Register.Indirect && !reg->Dimension.Indirect) { - LLVMValueRef c0, c1; + LLVMValueRef c0, c1, desc; - c0 = buffer_load_const(ctx, ctx->const_buffers[buf], + desc = load_const_buffer_desc(ctx, buf); + c0 = buffer_load_const(ctx, desc, LLVMConstInt(ctx->i32, idx * 4, 0)); if (!tgsi_type_is_64bit(type)) return bitcast(bld_base, type, c0); else { - c1 = buffer_load_const(ctx, ctx->const_buffers[buf], + c1 = buffer_load_const(ctx, desc, LLVMConstInt(ctx->i32, (idx + 1) * 4, 0)); return radeon_llvm_emit_fetch_64bit(bld_base, type, c0, c1); } } if (reg->Register.Dimension && reg->Dimension.Indirect) { LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS); LLVMValueRef index; index = get_bounded_indirect_index(ctx, ®->DimIndirect, reg->Dimension.Index, SI_NUM_CONST_BUFFERS); bufp = build_indexed_load_const(ctx, ptr, index); } else - bufp = ctx->const_buffers[buf]; + bufp = load_const_buffer_desc(ctx, buf); addr = ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle]; addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg"); addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16); addr = lp_build_add(&bld_base->uint_bld, addr, lp_build_const_int32(base->gallivm, idx * 4)); result = buffer_load_const(ctx, bufp, addr); if (!tgsi_type_is_64bit(type)) @@ -5869,38 +5878,20 @@ static void create_function(struct si_shader_context *ctx) LLVMArrayType(ctx->i32, 64), "ddxy_lds", LOCAL_ADDR_SPACE); if ((ctx->type == PIPE_SHADER_VERTEX && shader->key.vs.as_ls) || ctx->type == PIPE_SHADER_TESS_CTRL || ctx->type == PIPE_SHADER_TESS_EVAL) declare_tess_lds(ctx); } -static void preload_constant_buffers(struct si_shader_context *ctx) -{ - struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base; - struct gallivm_state *gallivm = bld_base->base.gallivm; - const struct tgsi_shader_info *info = bld_base->info; - unsigned buf; - LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS); - - for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) { - if (info->const_file_max[buf] == -1) - continue; - - /* Load the resource descriptor */ - ctx->const_buffers[buf] = - build_indexed_load_const(ctx, ptr, lp_build_const_int32(gallivm, buf)); - } -} - /** * Load ESGS and GSVS ring buffer resource descriptors and save the variables * for later use. */ static void preload_ring_buffers(struct si_shader_context *ctx) { struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm; LLVMValueRef buf_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, @@ -6728,21 +6719,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, case PIPE_SHADER_COMPUTE: ctx.radeon_bld.declare_memory_region = declare_compute_memory; break; default: assert(!"Unsupported shader type"); return -1; } create_meta_data(&ctx); create_function(&ctx); - preload_constant_buffers(&ctx); preload_ring_buffers(&ctx); if (ctx.is_monolithic && sel->type == PIPE_SHADER_FRAGMENT && shader->key.ps.prolog.poly_stipple) { LLVMValueRef list = LLVMGetParam(ctx.radeon_bld.main_fn, SI_PARAM_RW_BUFFERS); si_llvm_emit_polygon_stipple(&ctx, list, SI_PARAM_POS_FIXED_PT); } -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev