From: Marek Olšák <marek.ol...@amd.com> For inputs and outputs, indirect indexing is lowered by the GLSL compiler. For temporaries, use alloca and disable the "promote-alloca" pass.
In the future, we could switch all codepaths to alloca permanently and just rely on the "promote-alloca" pass. --- src/gallium/drivers/radeonsi/si_pipe.c | 25 ++++++++++++++++------ src/gallium/drivers/radeonsi/si_pipe.h | 1 + .../drivers/radeonsi/si_shader_tgsi_setup.c | 3 +-- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index afb2bcb..8a4bc41 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -134,22 +134,23 @@ static void si_emit_string_marker(struct pipe_context *ctx, dd_parse_apitrace_marker(string, len, &sctx->apitrace_call_number); } static LLVMTargetMachineRef si_create_llvm_target_machine(struct si_screen *sscreen) { const char *triple = "amdgcn--"; char features[256]; snprintf(features, sizeof(features), - "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s", + "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s", sscreen->b.chip_class >= GFX9 ? ",+xnack" : ",-xnack", + sscreen->llvm_has_working_vgpr_indexing ? "" : ",-promote-alloca", sscreen->b.debug_flags & DBG_SI_SCHED ? ",+si-scheduler" : ""); return LLVMCreateTargetMachine(ac_get_llvm_target(triple), triple, r600_get_llvm_processor_name(sscreen->b.family), features, LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); } @@ -750,34 +751,41 @@ static int si_get_shader_param(struct pipe_screen* pscreen, case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: return 3; /* Supported boolean features. */ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: - case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: case PIPE_SHADER_CAP_INTEGERS: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: return 1; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: - /* TODO: Indirection of geometry shader input dimension is not - * handled yet - */ - return shader != PIPE_SHADER_GEOMETRY; + /* TODO: Indirect indexing of GS inputs is unimplemented. */ + return shader != PIPE_SHADER_GEOMETRY && + (sscreen->llvm_has_working_vgpr_indexing || + /* TCS and TES load inputs directly from LDS or + * offchip memory, so indirect indexing is trivial. */ + shader == PIPE_SHADER_TESS_CTRL || + shader == PIPE_SHADER_TESS_EVAL); + + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + return sscreen->llvm_has_working_vgpr_indexing || + /* TCS stores outputs directly to memory. */ + shader == PIPE_SHADER_TESS_CTRL; /* Unsupported boolean features. */ case PIPE_SHADER_CAP_SUBROUTINES: case PIPE_SHADER_CAP_SUPPORTED_IRS: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: return 0; } return 0; } @@ -999,20 +1007,25 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->b.info.me_fw_version >= 173) || (sscreen->b.chip_class == SI && sscreen->b.info.pfp_fw_version >= 121 && sscreen->b.info.me_fw_version >= 87); sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI; sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 && sscreen->b.family <= CHIP_POLARIS12) || sscreen->b.family == CHIP_VEGA10 || sscreen->b.family == CHIP_RAVEN; + /* While it would be nice not to have this flag, we are constrained + * by the reality that LLVM 5.0 doesn't have working VGPR indexing + * on GFX9. + */ + sscreen->llvm_has_working_vgpr_indexing = sscreen->b.chip_class <= VI; sscreen->b.has_cp_dma = true; sscreen->b.has_streamout = true; /* Some chips have RB+ registers, but don't support RB+. Those must * always disable it. */ if (sscreen->b.family == CHIP_STONEY || sscreen->b.chip_class >= GFX9) { sscreen->b.has_rbplus = true; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index bd724e8..c028aba 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -76,20 +76,21 @@ struct hash_table; struct u_suballocator; struct si_screen { struct r600_common_screen b; unsigned gs_table_depth; unsigned tess_offchip_block_dw_size; bool has_distributed_tess; bool has_draw_indirect_multi; bool has_ds_bpermute; bool has_msaa_sample_loc_bug; + bool llvm_has_working_vgpr_indexing; /* Whether shaders are monolithic (1-part) or separate (3-part). */ bool use_monolithic_shaders; bool record_llvm_ir; mtx_t shader_parts_mutex; struct si_shader_part *vs_prologs; struct si_shader_part *tcs_epilogs; struct si_shader_part *gs_prologs; struct si_shader_part *ps_prologs; diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index b37d4b2..9c4a234 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -748,22 +748,21 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base, * LLVM will store in a register, so theoretically an * array with up to 4 * 16 = 64 elements could be * handled this way, but whether that's a good idea * depends on VGPR register pressure elsewhere. * * FIXME: We shouldn't need to have the non-alloca * code path for arrays. LLVM should be smart enough to * promote allocas into registers when profitable. */ if (array_size > 16 || - /* TODO: VGPR indexing is buggy on GFX9. */ - ctx->screen->b.chip_class == GFX9) { + !ctx->screen->llvm_has_working_vgpr_indexing) { array_alloca = LLVMBuildAlloca(builder, LLVMArrayType(ctx->f32, array_size), "array"); ctx->temp_array_allocas[id] = array_alloca; } } if (!ctx->temps_count) { ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1; ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef)); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev