From: Marek Olšák <marek.ol...@amd.com>

This might reduce the on-demand compilation if the initial VS/LS/ES
determination is wrong.
---
 src/gallium/drivers/radeonsi/si_shader.c        |  2 +-
 src/gallium/drivers/radeonsi/si_shader.h        | 16 +++++++++++
 src/gallium/drivers/radeonsi/si_state_shaders.c | 36 ++++++++++++++++++++++---
 3 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 1829e3e..de42778 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -8233,21 +8233,21 @@ static void si_fix_resource_usage(struct si_screen 
*sscreen,
                si_multiwave_lds_size_workaround(sscreen,
                                                 &shader->config.lds_size);
        }
 }
 
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                     struct si_shader *shader,
                     struct pipe_debug_callback *debug)
 {
        struct si_shader_selector *sel = shader->selector;
-       struct si_shader *mainp = sel->main_shader_part;
+       struct si_shader *mainp = *si_get_main_shader_part(sel, &shader->key);
        int r;
 
        /* LS, ES, VS are compiled on demand if the main part hasn't been
         * compiled for that stage.
         *
         * Vertex shaders are compiled on demand when a vertex fetch
         * workaround must be applied.
         */
        if (shader->is_monolithic) {
                /* Monolithic shader (compiled as a whole, has many variants,
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index da88df0..d4b57c9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -280,20 +280,22 @@ struct si_shader_selector {
        struct si_compiler_ctx_state compiler_ctx_state;
 
        pipe_mutex              mutex;
        struct si_shader        *first_variant; /* immutable after the first 
variant */
        struct si_shader        *last_variant; /* mutable */
 
        /* The compiled TGSI shader expecting a prolog and/or epilog (not
         * uploaded to a buffer).
         */
        struct si_shader        *main_shader_part;
+       struct si_shader        *main_shader_part_ls; /* as_ls is set in the 
key */
+       struct si_shader        *main_shader_part_es; /* as_es is set in the 
key */
 
        struct si_shader        *gs_copy_shader;
 
        struct tgsi_token       *tokens;
        struct pipe_stream_output_info  so;
        struct tgsi_shader_info         info;
 
        /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
        unsigned        type;
 
@@ -578,11 +580,25 @@ void si_shader_apply_scratch_relocs(struct si_context 
*sctx,
                        struct si_shader *shader,
                        struct si_shader_config *config,
                        uint64_t scratch_va);
 void si_shader_binary_read_config(struct radeon_shader_binary *binary,
                                  struct si_shader_config *conf,
                                  unsigned symbol_offset);
 unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
                                    bool writes_samplemask);
 const char *si_get_shader_name(struct si_shader *shader, unsigned processor);
 
+/* Inline helpers. */
+
+/* Return the pointer to the main shader part's pointer. */
+static inline struct si_shader **
+si_get_main_shader_part(struct si_shader_selector *sel,
+                       struct si_shader_key *key)
+{
+       if (key->as_ls)
+               return &sel->main_shader_part_ls;
+       if (key->as_es)
+               return &sel->main_shader_part_es;
+       return &sel->main_shader_part;
+}
+
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 3630911..179176c 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1229,29 +1229,53 @@ again:
        /* Build a new shader. */
        shader = CALLOC_STRUCT(si_shader);
        if (!shader) {
                pipe_mutex_unlock(sel->mutex);
                return -ENOMEM;
        }
        shader->selector = sel;
        shader->key = *key;
        shader->compiler_ctx_state = *compiler_state;
 
+       /* Compile the main shader part if it doesn't exist. This can happen
+        * if the initial guess was wrong. */
+       struct si_shader **mainp = si_get_main_shader_part(sel, key);
        bool is_pure_monolithic =
                memcmp(&key->mono, &zeroed.mono, sizeof(key->mono)) != 0;
 
+       if (!*mainp && !is_pure_monolithic) {
+               struct si_shader *main_part = CALLOC_STRUCT(si_shader);
+
+               if (!main_part) {
+                       FREE(shader);
+                       pipe_mutex_unlock(sel->mutex);
+                       return -ENOMEM; /* skip the draw call */
+               }
+
+               main_part->selector = sel;
+               main_part->key.as_es = key->as_es;
+               main_part->key.as_ls = key->as_ls;
+
+               if (si_compile_tgsi_shader(sscreen, compiler_state->tm,
+                                          main_part, false,
+                                          &compiler_state->debug) != 0) {
+                       FREE(main_part);
+                       FREE(shader);
+                       pipe_mutex_unlock(sel->mutex);
+                       return -ENOMEM; /* skip the draw call */
+               }
+               *mainp = main_part;
+       }
+
        /* Monolithic-only shaders don't make a distinction between optimized
         * and unoptimized. */
        shader->is_monolithic =
-               !sel->main_shader_part ||
-               sel->main_shader_part->key.as_ls != key->as_ls ||
-               sel->main_shader_part->key.as_es != key->as_es ||
                is_pure_monolithic ||
                memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
 
        shader->is_optimized =
                !sscreen->use_monolithic_shaders &&
                memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
        if (shader->is_optimized)
                util_queue_fence_init(&shader->optimized_ready);
 
        if (!sel->last_variant) {
@@ -1392,21 +1416,21 @@ void si_init_shader_selector_async(void *job, int 
thread_index)
                        }
 
                        if (tgsi_binary) {
                                pipe_mutex_lock(sscreen->shader_cache_mutex);
                                if (!si_shader_cache_insert_shader(sscreen, 
tgsi_binary, shader))
                                        FREE(tgsi_binary);
                                pipe_mutex_unlock(sscreen->shader_cache_mutex);
                        }
                }
 
-               sel->main_shader_part = shader;
+               *si_get_main_shader_part(sel, &shader->key) = shader;
 
                /* Unset "outputs_written" flags for outputs converted to
                 * DEFAULT_VAL, so that later inter-shader optimizations don't
                 * try to eliminate outputs that don't exist in the final
                 * shader.
                 *
                 * This is only done if non-monolithic shaders are enabled.
                 */
                if ((sel->type == PIPE_SHADER_VERTEX ||
                     sel->type == PIPE_SHADER_TESS_EVAL) &&
@@ -1862,20 +1886,24 @@ static void si_delete_shader_selector(struct 
pipe_context *ctx, void *state)
        }
 
        while (p) {
                c = p->next_variant;
                si_delete_shader(sctx, p);
                p = c;
        }
 
        if (sel->main_shader_part)
                si_delete_shader(sctx, sel->main_shader_part);
+       if (sel->main_shader_part_ls)
+               si_delete_shader(sctx, sel->main_shader_part_ls);
+       if (sel->main_shader_part_es)
+               si_delete_shader(sctx, sel->main_shader_part_es);
        if (sel->gs_copy_shader)
                si_delete_shader(sctx, sel->gs_copy_shader);
 
        util_queue_fence_destroy(&sel->ready);
        pipe_mutex_destroy(sel->mutex);
        free(sel->tokens);
        free(sel);
 }
 
 static unsigned si_get_ps_input_cntl(struct si_context *sctx,
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to