On 15.02.2016 18:59, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

---
  src/gallium/drivers/radeonsi/si_pipe.c          |   5 +-
  src/gallium/drivers/radeonsi/si_pipe.h          |  16 ++
  src/gallium/drivers/radeonsi/si_shader.h        |   4 +-
  src/gallium/drivers/radeonsi/si_state.h         |   2 +
  src/gallium/drivers/radeonsi/si_state_shaders.c | 234 +++++++++++++++++++++++-
  5 files changed, 254 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 75d4775..a576237 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -563,7 +563,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
                }
        }
        pipe_mutex_destroy(sscreen->shader_parts_mutex);
-
+       si_destroy_shader_cache(sscreen);
        r600_destroy_common_screen(&sscreen->b);
  }

@@ -611,7 +611,8 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws)
        sscreen->b.b.resource_create = r600_resource_create_common;

        if (!r600_common_screen_init(&sscreen->b, ws) ||
-           !si_init_gs_info(sscreen)) {
+           !si_init_gs_info(sscreen) ||
+           !si_init_shader_cache(sscreen)) {
                FREE(sscreen);
                return NULL;
        }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 1ac7bc4..ef860a5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -80,6 +80,7 @@
  #define SI_MAX_BORDER_COLORS  4096

  struct si_compute;
+struct hash_table;

  struct si_screen {
        struct r600_common_screen       b;
@@ -94,6 +95,21 @@ struct si_screen {
        struct si_shader_part           *tcs_epilogs;
        struct si_shader_part           *ps_prologs;
        struct si_shader_part           *ps_epilogs;
+
+       /* Shader cache in memory.
+        *
+        * Design & limitations:
+        * - The shader cache is per screen (= per process), never saved to
+        *   disk, and skips redundant shader compilations from TGSI to 
bytecode.
+        * - It can only be used with one-variant-per-shader support, in which
+        *   case only the main (typically middle) part of shaders is cached.
+        * - Only VS, TCS, TES, PS are cached, out of which only the hw VS
+        *   variants of VS and TES are cached, so LS and ES aren't.
+        * - GS and CS aren't cached, but it's certainly possible to cache
+        *   those as well.
+        */
+       pipe_mutex                      shader_cache_mutex;
+       struct hash_table               *shader_cache;
  };

  struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 48e048d..7e46871 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -362,8 +362,10 @@ struct si_shader {
        struct r600_resource            *bo;
        struct r600_resource            *scratch_bo;
        union si_shader_key             key;
-       struct radeon_shader_binary     binary;
        bool                            is_binary_shared;
+
+       /* The following data is all that's needed for binary shaders. */
+       struct radeon_shader_binary     binary;
        struct si_shader_config         config;
        struct si_shader_info           info;
  };
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index f64c4d4..40792cb 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
  /* si_state_shader.c */
  bool si_update_shaders(struct si_context *sctx);
  void si_init_shader_functions(struct si_context *sctx);
+bool si_init_shader_cache(struct si_screen *sscreen);
+void si_destroy_shader_cache(struct si_screen *sscreen);

  /* si_state_draw.c */
  void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index c62cbb7..bc3e5be 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -32,10 +32,217 @@

  #include "tgsi/tgsi_parse.h"
  #include "tgsi/tgsi_ureg.h"
+#include "util/hash_table.h"
+#include "util/u_hash.h"
  #include "util/u_memory.h"
  #include "util/u_prim.h"
  #include "util/u_simple_shaders.h"

+/* SHADER_CACHE */
+
+/**
+ * Return the TGSI binary in a buffer. The first 4 bytes contain its size as
+ * integer.
+ */
+static void *si_get_tgsi_binary(struct si_shader_selector *sel)
+{
+       unsigned tgsi_size = tgsi_num_tokens(sel->tokens) *
+                            sizeof(struct tgsi_token);
+       unsigned size = 4 + tgsi_size + sizeof(sel->so);
+       char *result = (char*)MALLOC(size);
+
+       if (!result)
+               return NULL;
+
+       *((uint32_t*)result) = size;
+       memcpy(result + 4, sel->tokens, tgsi_size);
+       memcpy(result + 4 + tgsi_size, &sel->so, sizeof(sel->so));
+       return result;
+}
+
+/** Copy "data" to "ptr" and return the next dword following copied data. */
+static uint32_t *write_data(uint32_t *ptr, const void *data, unsigned size)
+{
+       memcpy(ptr, data, size);
+       ptr += DIV_ROUND_UP(size, 4);
+       return ptr;
+}
+
+/** Read data from "ptr". Return the next dword following the data. */
+static uint32_t *read_data(uint32_t *ptr, void *data, unsigned size)
+{
+       memcpy(data, ptr, size);
+       ptr += DIV_ROUND_UP(size, 4);
+       return ptr;
+}
+
+/**
+ * Write the size as uint followed by the data. Return the next dword
+ * following the copied data.
+ */
+static uint32_t *write_chunk(uint32_t *ptr, const void *data, unsigned size)
+{
+       *ptr++ = size;
+       return write_data(ptr, data, size);
+}
+
+/**
+ * Read the size as uint followed by the data. Return both via parameters.
+ * Return the next dword following the data.
+ */
+static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size)
+{
+       *size = *ptr++;
+       assert(*data == NULL);
+       *data = malloc(*size);
+       return read_data(ptr, *data, *size);
+}
+
+/**
+ * Return the shader binary in a buffer. The first 4 bytes contain its size
+ * as integer.
+ */
+static void *si_get_shader_binary(struct si_shader *shader)
+{
+       /* There is always a size of data followed by the data itself. */
+       unsigned relocs_size = shader->binary.reloc_count *
+                              sizeof(shader->binary.relocs[0]);
+       unsigned disasm_size = strlen(shader->binary.disasm_string) + 1;
+       unsigned size =
+               4 + /* total size */
+               4 + /* CRC32 of the data below */
+               align(sizeof(shader->config), 4) +
+               align(sizeof(shader->info), 4) +
+               4 + align(shader->binary.code_size, 4) +
+               4 + align(shader->binary.rodata_size, 4) +
+               4 + align(relocs_size, 4) +
+               4 + align(disasm_size, 4);
+       void *buffer = CALLOC(1, size);
+       uint32_t *ptr = (uint32_t*)buffer;
+
+       if (!buffer)
+               return NULL;
+
+       *ptr++ = size;
+       ptr++; /* CRC32 is calculated at the end. */
+
+       ptr = write_data(ptr, &shader->config, sizeof(shader->config));
+       ptr = write_data(ptr, &shader->info, sizeof(shader->info));
+       ptr = write_chunk(ptr, shader->binary.code, shader->binary.code_size);
+       ptr = write_chunk(ptr, shader->binary.rodata, 
shader->binary.rodata_size);
+       ptr = write_chunk(ptr, shader->binary.relocs, relocs_size);
+       ptr = write_chunk(ptr, shader->binary.disasm_string, disasm_size);

Suggestion: assert((char *)ptr - (char *)buffer == size);

+
+       /* Compute CRC32. */
+       ptr = (uint32_t*)buffer;
+       ptr++;
+       *ptr = util_hash_crc32(ptr + 1, size - 8);
+
+       return buffer;
+}
+
+static bool si_load_shader_binary(struct si_shader *shader, void *binary)
+{
+       uint32_t *ptr = (uint32_t*)binary;
+       uint32_t size = *ptr++;
+       uint32_t crc32 = *ptr++;
+       unsigned chunk_size;
+
+       if (util_hash_crc32(ptr, size - 8) != crc32) {
+               fprintf(stderr, "radeonsi: binary shader has invalid CRC32\n");
+               return false;
+       }
+
+       ptr = read_data(ptr, &shader->config, sizeof(shader->config));
+       ptr = read_data(ptr, &shader->info, sizeof(shader->info));
+       ptr = read_chunk(ptr, (void**)&shader->binary.code,
+                        &shader->binary.code_size);
+       ptr = read_chunk(ptr, (void**)&shader->binary.rodata,
+                        &shader->binary.rodata_size);
+       ptr = read_chunk(ptr, (void**)&shader->binary.relocs, &chunk_size);
+       shader->binary.reloc_count = chunk_size / 
sizeof(shader->binary.relocs[0]);
+       ptr = read_chunk(ptr, (void**)&shader->binary.disasm_string, 
&chunk_size);
+
+       return true;
+}
+
+/**
+ * Insert a shader into the cache. It's assumed the shader is not in the cache.
+ * Use si_shader_cache_load_shader before calling this.
+ *
+ * Returns true if the tgsi_binary should be deleted after this.
+ */
+static bool si_shader_cache_insert_shader(struct si_screen *sscreen,
+                                         void *tgsi_binary,
+                                         struct si_shader *shader)
+{
+       struct hash_entry *entry;
+       void *hw_binary = si_get_shader_binary(shader);
+
+       if (!hw_binary)
+               return true;
+
+       entry = _mesa_hash_table_insert(sscreen->shader_cache, tgsi_binary,
+                                       hw_binary);
+       return entry->key != tgsi_binary;

_mesa_hash_table_insert can return NULL, indicating that it didn't insert anything.

If it is successful, entry->key == tgsi_binary will always hold, so the second check is unnecessary.

Nicolai

+}
+
+static bool si_shader_cache_load_shader(struct si_screen *sscreen,
+                                       void *tgsi_binary,
+                                       struct si_shader *shader)
+{
+       struct hash_entry *entry =
+               _mesa_hash_table_search(sscreen->shader_cache, tgsi_binary);
+       if (!entry)
+               return false;
+
+       return si_load_shader_binary(shader, entry->data);
+}
+
+static uint32_t si_shader_cache_key_hash(const void *key)
+{
+       /* The first dword is the key size. */
+       return util_hash_crc32(key, *(uint32_t*)key);
+}
+
+static bool si_shader_cache_key_equals(const void *a, const void *b)
+{
+       uint32_t *keya = (uint32_t*)a;
+       uint32_t *keyb = (uint32_t*)b;
+
+       /* The first dword is the key size. */
+       if (*keya != *keyb)
+               return false;
+
+       return memcmp(keya, keyb, *keya) == 0;
+}
+
+static void si_destroy_shader_cache_entry(struct hash_entry *entry)
+{
+       FREE((void*)entry->key);
+       FREE(entry->data);
+}
+
+bool si_init_shader_cache(struct si_screen *sscreen)
+{
+       pipe_mutex_init(sscreen->shader_cache_mutex);
+       sscreen->shader_cache =
+               _mesa_hash_table_create(NULL,
+                                       si_shader_cache_key_hash,
+                                       si_shader_cache_key_equals);
+       return sscreen->shader_cache != NULL;
+}
+
+void si_destroy_shader_cache(struct si_screen *sscreen)
+{
+       if (sscreen->shader_cache)
+               _mesa_hash_table_destroy(sscreen->shader_cache,
+                                        si_destroy_shader_cache_entry);
+       pipe_mutex_destroy(sscreen->shader_cache_mutex);
+}
+
+/* SHADER STATES */
+
  static void si_set_tesseval_regs(struct si_shader *shader,
                                 struct si_pm4_state *pm4)
  {
@@ -936,17 +1143,36 @@ static void *si_create_shader_selector(struct 
pipe_context *ctx,
        if (sel->type != PIPE_SHADER_GEOMETRY &&
            !sscreen->use_monolithic_shaders) {
                struct si_shader *shader = CALLOC_STRUCT(si_shader);
+               void *tgsi_binary;

                if (!shader)
                        goto error;

                shader->selector = sel;

-               if (si_compile_tgsi_shader(sscreen, sctx->tm, shader, false,
-                                          &sctx->b.debug) != 0) {
-                       FREE(shader);
-                       goto error;
+               tgsi_binary = si_get_tgsi_binary(sel);
+
+               /* Try to load the shader from the shader cache. */
+               pipe_mutex_lock(sscreen->shader_cache_mutex);
+
+               if (tgsi_binary &&
+                   si_shader_cache_load_shader(sscreen, tgsi_binary, shader)) {
+                       FREE(tgsi_binary);
+               } else {
+                       /* Compile the shader if it hasn't been loaded from the 
cache. */
+                       if (si_compile_tgsi_shader(sscreen, sctx->tm, shader, 
false,
+                                                  &sctx->b.debug) != 0) {
+                               FREE(shader);
+                               FREE(tgsi_binary);
+                               pipe_mutex_unlock(sscreen->shader_cache_mutex);
+                               goto error;
+                       }
+
+                       if (si_shader_cache_insert_shader(sscreen, tgsi_binary, 
shader))
+                               FREE(tgsi_binary);
                }
+               pipe_mutex_unlock(sscreen->shader_cache_mutex);
+
                sel->main_shader_part = shader;
        }


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to