From: Marek Olšák <marek.ol...@amd.com>

In addition to the non-monolithic variant.
---
 src/gallium/drivers/radeonsi/si_shader.h        | 10 +++++++++-
 src/gallium/drivers/radeonsi/si_state_shaders.c | 19 +++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index e0227e4..4fb79e6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -438,26 +438,34 @@ struct si_shader_key {
        unsigned as_ls:1; /* local shader, which precedes TCS */
 
        /* Flags for monolithic compilation only. */
        struct {
                /* One byte for every input: SI_FIX_FETCH_* enums. */
                uint8_t         vs_fix_fetch[SI_MAX_ATTRIBS];
                uint64_t        ff_tcs_inputs_to_copy; /* for fixed-func TCS */
        } mono;
 
        /* Optimization flags for asynchronous compilation only. */
-       union {
+       struct {
                struct {
                        uint64_t        kill_outputs; /* "get_unique_index" 
bits */
                        uint32_t        kill_outputs2; /* "get_unique_index2" 
bits */
                        unsigned        clip_disable:1;
                } hw_vs; /* HW VS (it can be VS, TES, GS) */
+
+               /* For shaders where monolithic variants have better code.
+                *
+                * This is a flag that has no effect on code generation,
+                * but forces monolithic shaders to be used as soon as
+                * possible, because it's in the "opt" group.
+                */
+               unsigned        prefer_mono:1;
        } opt;
 };
 
 struct si_shader_config {
        unsigned                        num_sgprs;
        unsigned                        num_vgprs;
        unsigned                        spilled_sgprs;
        unsigned                        spilled_vgprs;
        unsigned                        private_mem_vgprs;
        unsigned                        lds_size;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 6bb3f50..22bf3cf 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1271,20 +1271,39 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
                break;
        case PIPE_SHADER_GEOMETRY:
                if (sctx->b.chip_class >= GFX9) {
                        if (sctx->tes_shader.cso) {
                                key->part.gs.es = sctx->tes_shader.cso;
                        } else {
                                si_shader_selector_key_vs(sctx, 
sctx->vs_shader.cso,
                                                          key, 
&key->part.gs.vs_prolog);
                                key->part.gs.es = sctx->vs_shader.cso;
                        }
+
+                       /* Merged ES-GS can have unbalanced wave usage.
+                        *
+                        * ES threads are per-vertex, while GS threads are
+                        * per-primitive. So without any amplification, there
+                        * are fewer GS threads than ES threads, which can 
result
+                        * in empty (no-op) GS waves. With too much 
amplification,
+                        * there are more GS threads than ES threads, which
+                        * can result in empty (no-op) ES waves.
+                        *
+                        * Non-monolithic shaders are implemented by setting 
EXEC
+                        * at the beginning of shader parts, and don't jump to
+                        * the end if EXEC is 0.
+                        *
+                        * Monolithic shaders use conditional blocks, so they 
can
+                        * jump and skip empty waves of ES or GS. So set this to
+                        * always use optimized variants, which are monolithic.
+                        */
+                       key->opt.prefer_mono = 1;
                }
                key->part.gs.prolog.tri_strip_adj_fix = 
sctx->gs_tri_strip_adj_fix;
                break;
        case PIPE_SHADER_FRAGMENT: {
                struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
                struct si_state_blend *blend = sctx->queued.named.blend;
 
                if 
(sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
                    sel->info.colors_written == 0x1)
                        key->part.ps.epilog.last_cbuf = 
MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to