From: Nicolai Hähnle <nicolai.haeh...@amd.com> We'll have to extend this at some point, and using a bitfield union in this way makes it easier to get the right index without excessive branching. --- src/gallium/drivers/radeonsi/si_pipe.h | 23 ++++++- .../drivers/radeonsi/si_state_shaders.c | 65 +++++++++++-------- 2 files changed, 60 insertions(+), 28 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index d32feab52c2..368cb4e473d 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -760,20 +760,41 @@ union si_vgt_param_key { unsigned count_from_stream_output:1; unsigned primitive_restart:1; unsigned multi_instances_smaller_than_primgroup:1; unsigned uses_instancing:1; unsigned prim:4; #endif } u; uint32_t index; }; +#define SI_NUM_VGT_STAGES_KEY_BITS 2 +#define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS) + +/* The VGT_SHADER_STAGES key used to index the table of precomputed values. + * Some fields are set by state-change calls, most are set by draw_vbo. + */ +union si_vgt_stages_key { + struct { +#ifdef PIPE_ARCH_LITTLE_ENDIAN + unsigned tess:1; + unsigned gs:1; + unsigned _pad:32 - SI_NUM_VGT_STAGES_KEY_BITS; +#else /* PIPE_ARCH_BIG_ENDIAN */ + unsigned _pad:32 - SI_NUM_VGT_STAGES_KEY_BITS; + unsigned gs:1; + unsigned tess:1; +#endif + } u; + uint32_t index; +}; + struct si_texture_handle { unsigned desc_slot; bool desc_dirty; struct pipe_sampler_view *view; struct si_sampler_state sstate; }; struct si_image_handle { @@ -914,21 +935,21 @@ struct si_context { struct si_streamout streamout; struct si_viewports viewports; unsigned num_window_rectangles; bool window_rectangles_include; struct pipe_scissor_state window_rectangles[4]; /* Precomputed states. */ struct si_pm4_state *init_config; struct si_pm4_state *init_config_gs_rings; bool init_config_has_vgt_flush; - struct si_pm4_state *vgt_shader_config[4]; + struct si_pm4_state *vgt_shader_config[SI_NUM_VGT_STAGES_STATES]; /* shaders */ struct si_shader_ctx_state ps_shader; struct si_shader_ctx_state gs_shader; struct si_shader_ctx_state vs_shader; struct si_shader_ctx_state tcs_shader; struct si_shader_ctx_state tes_shader; struct si_shader_ctx_state cs_prim_discard_state; struct si_cs_shader_state cs_shader_state; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index fab2e255742..0e3a1c3a776 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -3311,74 +3311,83 @@ static void si_init_tess_factor_ring(struct si_context *sctx) } /* Flush the context to re-emit the init_config state. * This is done only once in a lifetime of a context. */ si_pm4_upload_indirect_buffer(sctx, sctx->init_config); sctx->initial_gfx_cs_size = 0; /* force flush */ si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); } -static void si_update_vgt_shader_config(struct si_context *sctx) +static struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen, + union si_vgt_stages_key key) { - /* Calculate the index of the config. - * 0 = VS, 1 = VS+GS, 2 = VS+Tess, 3 = VS+Tess+GS */ - unsigned index = 2*!!sctx->tes_shader.cso + !!sctx->gs_shader.cso; - struct si_pm4_state **pm4 = &sctx->vgt_shader_config[index]; + struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); + uint32_t stages = 0; - if (!*pm4) { - uint32_t stages = 0; + if (key.u.tess) { + stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | + S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1); - *pm4 = CALLOC_STRUCT(si_pm4_state); + if (key.u.gs) + stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | + S_028B54_GS_EN(1); + else + stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS); + } else if (key.u.gs) { + stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | + S_028B54_GS_EN(1); + } - if (sctx->tes_shader.cso) { - stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | - S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1); + if (key.u.gs) + stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); - if (sctx->gs_shader.cso) - stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | - S_028B54_GS_EN(1) | - S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); - else - stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS); - } else if (sctx->gs_shader.cso) { - stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | - S_028B54_GS_EN(1) | - S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); - } + if (screen->info.chip_class >= GFX9) + stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2); - if (sctx->chip_class >= GFX9) - stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2); + si_pm4_set_reg(pm4, R_028B54_VGT_SHADER_STAGES_EN, stages); + return pm4; +} - si_pm4_set_reg(*pm4, R_028B54_VGT_SHADER_STAGES_EN, stages); - } +static void si_update_vgt_shader_config(struct si_context *sctx, + union si_vgt_stages_key key) +{ + struct si_pm4_state **pm4 = &sctx->vgt_shader_config[key.index]; + + if (unlikely(!*pm4)) + *pm4 = si_build_vgt_shader_config(sctx->screen, key); si_pm4_bind_state(sctx, vgt_shader_config, *pm4); } bool si_update_shaders(struct si_context *sctx) { struct pipe_context *ctx = (struct pipe_context*)sctx; struct si_compiler_ctx_state compiler_state; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct si_shader *old_vs = si_get_vs_state(sctx); bool old_clip_disable = old_vs ? old_vs->key.opt.clip_disable : false; struct si_shader *old_ps = sctx->ps_shader.current; + union si_vgt_stages_key key; unsigned old_spi_shader_col_format = old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0; int r; compiler_state.compiler = &sctx->compiler; compiler_state.debug = sctx->debug; compiler_state.is_debug_context = sctx->is_debug; + key.index = 0; + /* Update stages before GS. */ if (sctx->tes_shader.cso) { + key.u.tess = 1; + if (!sctx->tess_rings) { si_init_tess_factor_ring(sctx); if (!sctx->tess_rings) return false; } /* VS as LS */ if (sctx->chip_class <= GFX8) { r = si_shader_select(ctx, &sctx->vs_shader, &compiler_state); @@ -3443,35 +3452,37 @@ bool si_update_shaders(struct si_context *sctx) r = si_shader_select(ctx, &sctx->vs_shader, &compiler_state); if (r) return false; si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4); si_pm4_bind_state(sctx, ls, NULL); si_pm4_bind_state(sctx, hs, NULL); } /* Update GS. */ if (sctx->gs_shader.cso) { + key.u.gs = 1; + r = si_shader_select(ctx, &sctx->gs_shader, &compiler_state); if (r) return false; si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4); si_pm4_bind_state(sctx, vs, sctx->gs_shader.cso->gs_copy_shader->pm4); if (!si_update_gs_ring_buffers(sctx)) return false; } else { si_pm4_bind_state(sctx, gs, NULL); if (sctx->chip_class <= GFX8) si_pm4_bind_state(sctx, es, NULL); } - si_update_vgt_shader_config(sctx); + si_update_vgt_shader_config(sctx, key); if (old_clip_disable != si_get_vs_state(sctx)->key.opt.clip_disable) si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs); if (sctx->ps_shader.cso) { unsigned db_shader_control; r = si_shader_select(ctx, &sctx->ps_shader, &compiler_state); if (r) return false; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev