From: Marek Olšák <marek.ol...@amd.com> It makes performance worse by a very small (hard to measure) amount. We've done extensive profiling of this feature internally.
Cc: 17.1 17.2 <mesa-sta...@lists.freedesktop.org> --- src/gallium/drivers/radeon/r600_pipe_common.c | 1 + src/gallium/drivers/radeon/r600_pipe_common.h | 4 ++-- src/gallium/drivers/radeonsi/si_pipe.c | 24 ++++++++++++++++++------ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 0038c9a..cb4b7a4 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -768,20 +768,21 @@ static const struct debug_named_value common_debug_options[] = { { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." }, { "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." }, { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." }, { "nowc", DBG_NO_WC, "Disable GTT write combining" }, { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." }, { "nodcc", DBG_NO_DCC, "Disable DCC." }, { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." }, { "norbplus", DBG_NO_RB_PLUS, "Disable RB+." }, { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." }, { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" }, + { "ce", DBG_CE, "Force enable the constant engine" }, { "noce", DBG_NO_CE, "Disable the constant engine"}, { "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader optimizations" }, { "nodccfb", DBG_NO_DCC_FB, "Disable separate DCC on the main framebuffer" }, DEBUG_NAMED_VALUE_END /* must be last */ }; static const char* r600_get_vendor(struct pipe_screen* pscreen) { return "X.Org"; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 67b3c87..14bc63e 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -58,26 +58,26 @@ #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0) /* Pipeline & streamout query controls. */ #define R600_CONTEXT_START_PIPELINE_STATS (1u << 1) #define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2) #define R600_CONTEXT_PRIVATE_FLAG (1u << 3) /* special primitive types */ #define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX /* Debug flags. */ -/* logging */ +/* logging and features */ #define DBG_TEX (1 << 0) #define DBG_NIR (1 << 1) #define DBG_COMPUTE (1 << 2) #define DBG_VM (1 << 3) -/* gap - reuse */ +#define DBG_CE (1 << 4) /* shader logging */ #define DBG_FS (1 << 5) #define DBG_VS (1 << 6) #define DBG_GS (1 << 7) #define DBG_PS (1 << 8) #define DBG_CS (1 << 9) #define DBG_TCS (1 << 10) #define DBG_TES (1 << 11) #define DBG_NO_IR (1 << 12) #define DBG_NO_TGSI (1 << 13) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 2c65cc8..cac1d01 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -194,26 +194,38 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sctx->b.b.create_video_codec = si_uvd_create_decoder; sctx->b.b.create_video_buffer = si_video_buffer_create; } else { sctx->b.b.create_video_codec = vl_create_decoder; sctx->b.b.create_video_buffer = vl_video_buffer_create; } sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush, sctx); - /* SI + AMDGPU + CE = GPU hang */ - if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib && - sscreen->b.chip_class != SI && - /* These can't use CE due to a power gating bug in the kernel. */ - sscreen->b.family != CHIP_CARRIZO && - sscreen->b.family != CHIP_STONEY) { + bool enable_ce = sscreen->b.chip_class != SI && /* SI hangs */ + /* These can't use CE due to a power gating bug in the kernel. */ + sscreen->b.family != CHIP_CARRIZO && + sscreen->b.family != CHIP_STONEY; + + /* CE is currently disabled by default, because it makes s_load latency + * worse, because CE IB doesn't run in lockstep with DE. + * Remove this line after that performance issue has been resolved. + */ + enable_ce = false; + + /* Apply CE overrides. */ + if (sscreen->b.debug_flags & DBG_NO_CE) + enable_ce = false; + else if (sscreen->b.debug_flags & DBG_CE) + enable_ce = true; + + if (ws->cs_add_const_ib && enable_ce) { sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs); if (!sctx->ce_ib) goto fail; if (ws->cs_add_const_preamble_ib) { sctx->ce_preamble_ib = ws->cs_add_const_preamble_ib(sctx->b.gfx.cs); if (!sctx->ce_preamble_ib) goto fail; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev