A new version of the patch is attached. Please review.

Marek

On Tue, Feb 16, 2016 at 6:02 PM, Nicolai Hähnle <[email protected]> wrote:
> On 15.02.2016 18:59, Marek Olšák wrote:
>>
>> From: Marek Olšák <[email protected]>
>>
>> ---
>>   src/gallium/drivers/radeonsi/si_pipe.c          |   5 +-
>>   src/gallium/drivers/radeonsi/si_pipe.h          |  16 ++
>>   src/gallium/drivers/radeonsi/si_shader.h        |   4 +-
>>   src/gallium/drivers/radeonsi/si_state.h         |   2 +
>>   src/gallium/drivers/radeonsi/si_state_shaders.c | 234
>> +++++++++++++++++++++++-
>>   5 files changed, 254 insertions(+), 7 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
>> b/src/gallium/drivers/radeonsi/si_pipe.c
>> index 75d4775..a576237 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.c
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>> @@ -563,7 +563,7 @@ static void si_destroy_screen(struct pipe_screen*
>> pscreen)
>>                 }
>>         }
>>         pipe_mutex_destroy(sscreen->shader_parts_mutex);
>> -
>> +       si_destroy_shader_cache(sscreen);
>>         r600_destroy_common_screen(&sscreen->b);
>>   }
>>
>> @@ -611,7 +611,8 @@ struct pipe_screen *radeonsi_screen_create(struct
>> radeon_winsys *ws)
>>         sscreen->b.b.resource_create = r600_resource_create_common;
>>
>>         if (!r600_common_screen_init(&sscreen->b, ws) ||
>> -           !si_init_gs_info(sscreen)) {
>> +           !si_init_gs_info(sscreen) ||
>> +           !si_init_shader_cache(sscreen)) {
>>                 FREE(sscreen);
>>                 return NULL;
>>         }
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
>> b/src/gallium/drivers/radeonsi/si_pipe.h
>> index 1ac7bc4..ef860a5 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.h
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>> @@ -80,6 +80,7 @@
>>   #define SI_MAX_BORDER_COLORS  4096
>>
>>   struct si_compute;
>> +struct hash_table;
>>
>>   struct si_screen {
>>         struct r600_common_screen       b;
>> @@ -94,6 +95,21 @@ struct si_screen {
>>         struct si_shader_part           *tcs_epilogs;
>>         struct si_shader_part           *ps_prologs;
>>         struct si_shader_part           *ps_epilogs;
>> +
>> +       /* Shader cache in memory.
>> +        *
>> +        * Design & limitations:
>> +        * - The shader cache is per screen (= per process), never saved
>> to
>> +        *   disk, and skips redundant shader compilations from TGSI to
>> bytecode.
>> +        * - It can only be used with one-variant-per-shader support, in
>> which
>> +        *   case only the main (typically middle) part of shaders is
>> cached.
>> +        * - Only VS, TCS, TES, PS are cached, out of which only the hw VS
>> +        *   variants of VS and TES are cached, so LS and ES aren't.
>> +        * - GS and CS aren't cached, but it's certainly possible to cache
>> +        *   those as well.
>> +        */
>> +       pipe_mutex                      shader_cache_mutex;
>> +       struct hash_table               *shader_cache;
>>   };
>>
>>   struct si_blend_color {
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.h
>> b/src/gallium/drivers/radeonsi/si_shader.h
>> index 48e048d..7e46871 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.h
>> +++ b/src/gallium/drivers/radeonsi/si_shader.h
>> @@ -362,8 +362,10 @@ struct si_shader {
>>         struct r600_resource            *bo;
>>         struct r600_resource            *scratch_bo;
>>         union si_shader_key             key;
>> -       struct radeon_shader_binary     binary;
>>         bool                            is_binary_shared;
>> +
>> +       /* The following data is all that's needed for binary shaders. */
>> +       struct radeon_shader_binary     binary;
>>         struct si_shader_config         config;
>>         struct si_shader_info           info;
>>   };
>> diff --git a/src/gallium/drivers/radeonsi/si_state.h
>> b/src/gallium/drivers/radeonsi/si_state.h
>> index f64c4d4..40792cb 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.h
>> +++ b/src/gallium/drivers/radeonsi/si_state.h
>> @@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context
>> *ctx,
>>   /* si_state_shader.c */
>>   bool si_update_shaders(struct si_context *sctx);
>>   void si_init_shader_functions(struct si_context *sctx);
>> +bool si_init_shader_cache(struct si_screen *sscreen);
>> +void si_destroy_shader_cache(struct si_screen *sscreen);
>>
>>   /* si_state_draw.c */
>>   void si_emit_cache_flush(struct si_context *sctx, struct r600_atom
>> *atom);
>> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
>> b/src/gallium/drivers/radeonsi/si_state_shaders.c
>> index c62cbb7..bc3e5be 100644
>> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
>> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
>> @@ -32,10 +32,217 @@
>>
>>   #include "tgsi/tgsi_parse.h"
>>   #include "tgsi/tgsi_ureg.h"
>> +#include "util/hash_table.h"
>> +#include "util/u_hash.h"
>>   #include "util/u_memory.h"
>>   #include "util/u_prim.h"
>>   #include "util/u_simple_shaders.h"
>>
>> +/* SHADER_CACHE */
>> +
>> +/**
>> + * Return the TGSI binary in a buffer. The first 4 bytes contain its size
>> as
>> + * integer.
>> + */
>> +static void *si_get_tgsi_binary(struct si_shader_selector *sel)
>> +{
>> +       unsigned tgsi_size = tgsi_num_tokens(sel->tokens) *
>> +                            sizeof(struct tgsi_token);
>> +       unsigned size = 4 + tgsi_size + sizeof(sel->so);
>> +       char *result = (char*)MALLOC(size);
>> +
>> +       if (!result)
>> +               return NULL;
>> +
>> +       *((uint32_t*)result) = size;
>> +       memcpy(result + 4, sel->tokens, tgsi_size);
>> +       memcpy(result + 4 + tgsi_size, &sel->so, sizeof(sel->so));
>> +       return result;
>> +}
>> +
>> +/** Copy "data" to "ptr" and return the next dword following copied data.
>> */
>> +static uint32_t *write_data(uint32_t *ptr, const void *data, unsigned
>> size)
>> +{
>> +       memcpy(ptr, data, size);
>> +       ptr += DIV_ROUND_UP(size, 4);
>> +       return ptr;
>> +}
>> +
>> +/** Read data from "ptr". Return the next dword following the data. */
>> +static uint32_t *read_data(uint32_t *ptr, void *data, unsigned size)
>> +{
>> +       memcpy(data, ptr, size);
>> +       ptr += DIV_ROUND_UP(size, 4);
>> +       return ptr;
>> +}
>> +
>> +/**
>> + * Write the size as uint followed by the data. Return the next dword
>> + * following the copied data.
>> + */
>> +static uint32_t *write_chunk(uint32_t *ptr, const void *data, unsigned
>> size)
>> +{
>> +       *ptr++ = size;
>> +       return write_data(ptr, data, size);
>> +}
>> +
>> +/**
>> + * Read the size as uint followed by the data. Return both via
>> parameters.
>> + * Return the next dword following the data.
>> + */
>> +static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size)
>> +{
>> +       *size = *ptr++;
>> +       assert(*data == NULL);
>> +       *data = malloc(*size);
>> +       return read_data(ptr, *data, *size);
>> +}
>> +
>> +/**
>> + * Return the shader binary in a buffer. The first 4 bytes contain its
>> size
>> + * as integer.
>> + */
>> +static void *si_get_shader_binary(struct si_shader *shader)
>> +{
>> +       /* There is always a size of data followed by the data itself. */
>> +       unsigned relocs_size = shader->binary.reloc_count *
>> +                              sizeof(shader->binary.relocs[0]);
>> +       unsigned disasm_size = strlen(shader->binary.disasm_string) + 1;
>> +       unsigned size =
>> +               4 + /* total size */
>> +               4 + /* CRC32 of the data below */
>> +               align(sizeof(shader->config), 4) +
>> +               align(sizeof(shader->info), 4) +
>> +               4 + align(shader->binary.code_size, 4) +
>> +               4 + align(shader->binary.rodata_size, 4) +
>> +               4 + align(relocs_size, 4) +
>> +               4 + align(disasm_size, 4);
>> +       void *buffer = CALLOC(1, size);
>> +       uint32_t *ptr = (uint32_t*)buffer;
>> +
>> +       if (!buffer)
>> +               return NULL;
>> +
>> +       *ptr++ = size;
>> +       ptr++; /* CRC32 is calculated at the end. */
>> +
>> +       ptr = write_data(ptr, &shader->config, sizeof(shader->config));
>> +       ptr = write_data(ptr, &shader->info, sizeof(shader->info));
>> +       ptr = write_chunk(ptr, shader->binary.code,
>> shader->binary.code_size);
>> +       ptr = write_chunk(ptr, shader->binary.rodata,
>> shader->binary.rodata_size);
>> +       ptr = write_chunk(ptr, shader->binary.relocs, relocs_size);
>> +       ptr = write_chunk(ptr, shader->binary.disasm_string, disasm_size);
>
>
> Suggestion: assert((char *)ptr - (char *)buffer == size);
>
>
>> +
>> +       /* Compute CRC32. */
>> +       ptr = (uint32_t*)buffer;
>> +       ptr++;
>> +       *ptr = util_hash_crc32(ptr + 1, size - 8);
>> +
>> +       return buffer;
>> +}
>> +
>> +static bool si_load_shader_binary(struct si_shader *shader, void *binary)
>> +{
>> +       uint32_t *ptr = (uint32_t*)binary;
>> +       uint32_t size = *ptr++;
>> +       uint32_t crc32 = *ptr++;
>> +       unsigned chunk_size;
>> +
>> +       if (util_hash_crc32(ptr, size - 8) != crc32) {
>> +               fprintf(stderr, "radeonsi: binary shader has invalid
>> CRC32\n");
>> +               return false;
>> +       }
>> +
>> +       ptr = read_data(ptr, &shader->config, sizeof(shader->config));
>> +       ptr = read_data(ptr, &shader->info, sizeof(shader->info));
>> +       ptr = read_chunk(ptr, (void**)&shader->binary.code,
>> +                        &shader->binary.code_size);
>> +       ptr = read_chunk(ptr, (void**)&shader->binary.rodata,
>> +                        &shader->binary.rodata_size);
>> +       ptr = read_chunk(ptr, (void**)&shader->binary.relocs,
>> &chunk_size);
>> +       shader->binary.reloc_count = chunk_size /
>> sizeof(shader->binary.relocs[0]);
>> +       ptr = read_chunk(ptr, (void**)&shader->binary.disasm_string,
>> &chunk_size);
>> +
>> +       return true;
>> +}
>> +
>> +/**
>> + * Insert a shader into the cache. It's assumed the shader is not in the
>> cache.
>> + * Use si_shader_cache_load_shader before calling this.
>> + *
>> + * Returns true if the tgsi_binary should be deleted after this.
>> + */
>> +static bool si_shader_cache_insert_shader(struct si_screen *sscreen,
>> +                                         void *tgsi_binary,
>> +                                         struct si_shader *shader)
>> +{
>> +       struct hash_entry *entry;
>> +       void *hw_binary = si_get_shader_binary(shader);
>> +
>> +       if (!hw_binary)
>> +               return true;
>> +
>> +       entry = _mesa_hash_table_insert(sscreen->shader_cache,
>> tgsi_binary,
>> +                                       hw_binary);
>> +       return entry->key != tgsi_binary;
>
>
> _mesa_hash_table_insert can return NULL, indicating that it didn't insert
> anything.
>
> If it is successful, entry->key == tgsi_binary will always hold, so the
> second check is unnecessary.
>
> Nicolai
>
>
>> +}
>> +
>> +static bool si_shader_cache_load_shader(struct si_screen *sscreen,
>> +                                       void *tgsi_binary,
>> +                                       struct si_shader *shader)
>> +{
>> +       struct hash_entry *entry =
>> +               _mesa_hash_table_search(sscreen->shader_cache,
>> tgsi_binary);
>> +       if (!entry)
>> +               return false;
>> +
>> +       return si_load_shader_binary(shader, entry->data);
>> +}
>> +
>> +static uint32_t si_shader_cache_key_hash(const void *key)
>> +{
>> +       /* The first dword is the key size. */
>> +       return util_hash_crc32(key, *(uint32_t*)key);
>> +}
>> +
>> +static bool si_shader_cache_key_equals(const void *a, const void *b)
>> +{
>> +       uint32_t *keya = (uint32_t*)a;
>> +       uint32_t *keyb = (uint32_t*)b;
>> +
>> +       /* The first dword is the key size. */
>> +       if (*keya != *keyb)
>> +               return false;
>> +
>> +       return memcmp(keya, keyb, *keya) == 0;
>> +}
>> +
>> +static void si_destroy_shader_cache_entry(struct hash_entry *entry)
>> +{
>> +       FREE((void*)entry->key);
>> +       FREE(entry->data);
>> +}
>> +
>> +bool si_init_shader_cache(struct si_screen *sscreen)
>> +{
>> +       pipe_mutex_init(sscreen->shader_cache_mutex);
>> +       sscreen->shader_cache =
>> +               _mesa_hash_table_create(NULL,
>> +                                       si_shader_cache_key_hash,
>> +                                       si_shader_cache_key_equals);
>> +       return sscreen->shader_cache != NULL;
>> +}
>> +
>> +void si_destroy_shader_cache(struct si_screen *sscreen)
>> +{
>> +       if (sscreen->shader_cache)
>> +               _mesa_hash_table_destroy(sscreen->shader_cache,
>> +                                        si_destroy_shader_cache_entry);
>> +       pipe_mutex_destroy(sscreen->shader_cache_mutex);
>> +}
>> +
>> +/* SHADER STATES */
>> +
>>   static void si_set_tesseval_regs(struct si_shader *shader,
>>                                  struct si_pm4_state *pm4)
>>   {
>> @@ -936,17 +1143,36 @@ static void *si_create_shader_selector(struct
>> pipe_context *ctx,
>>         if (sel->type != PIPE_SHADER_GEOMETRY &&
>>             !sscreen->use_monolithic_shaders) {
>>                 struct si_shader *shader = CALLOC_STRUCT(si_shader);
>> +               void *tgsi_binary;
>>
>>                 if (!shader)
>>                         goto error;
>>
>>                 shader->selector = sel;
>>
>> -               if (si_compile_tgsi_shader(sscreen, sctx->tm, shader,
>> false,
>> -                                          &sctx->b.debug) != 0) {
>> -                       FREE(shader);
>> -                       goto error;
>> +               tgsi_binary = si_get_tgsi_binary(sel);
>> +
>> +               /* Try to load the shader from the shader cache. */
>> +               pipe_mutex_lock(sscreen->shader_cache_mutex);
>> +
>> +               if (tgsi_binary &&
>> +                   si_shader_cache_load_shader(sscreen, tgsi_binary,
>> shader)) {
>> +                       FREE(tgsi_binary);
>> +               } else {
>> +                       /* Compile the shader if it hasn't been loaded
>> from the cache. */
>> +                       if (si_compile_tgsi_shader(sscreen, sctx->tm,
>> shader, false,
>> +                                                  &sctx->b.debug) != 0) {
>> +                               FREE(shader);
>> +                               FREE(tgsi_binary);
>> +
>> pipe_mutex_unlock(sscreen->shader_cache_mutex);
>> +                               goto error;
>> +                       }
>> +
>> +                       if (si_shader_cache_insert_shader(sscreen,
>> tgsi_binary, shader))
>> +                               FREE(tgsi_binary);
>>                 }
>> +               pipe_mutex_unlock(sscreen->shader_cache_mutex);
>> +
>>                 sel->main_shader_part = shader;
>>         }
>>
>>
>
From 016af534b9a16bad0b8becacbbca9a378d1f25a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <[email protected]>
Date: Thu, 11 Feb 2016 15:49:34 +0100
Subject: [PATCH] radeonsi: implement binary shaders & shader cache in memory
 (v2)

v2: handle _mesa_hash_table_insert failure
    other cosmetic changes
---
 src/gallium/drivers/radeonsi/si_pipe.c          |   5 +-
 src/gallium/drivers/radeonsi/si_pipe.h          |  16 ++
 src/gallium/drivers/radeonsi/si_shader.h        |   4 +-
 src/gallium/drivers/radeonsi/si_state.h         |   2 +
 src/gallium/drivers/radeonsi/si_state_shaders.c | 239 +++++++++++++++++++++++-
 5 files changed, 259 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 30f3ec0..37fd4a2 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -564,7 +564,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
 		}
 	}
 	pipe_mutex_destroy(sscreen->shader_parts_mutex);
-
+	si_destroy_shader_cache(sscreen);
 	r600_destroy_common_screen(&sscreen->b);
 }
 
@@ -612,7 +612,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 	sscreen->b.b.resource_create = r600_resource_create_common;
 
 	if (!r600_common_screen_init(&sscreen->b, ws) ||
-	    !si_init_gs_info(sscreen)) {
+	    !si_init_gs_info(sscreen) ||
+	    !si_init_shader_cache(sscreen)) {
 		FREE(sscreen);
 		return NULL;
 	}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 1ac7bc4..ef860a5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -80,6 +80,7 @@
 #define SI_MAX_BORDER_COLORS	4096
 
 struct si_compute;
+struct hash_table;
 
 struct si_screen {
 	struct r600_common_screen	b;
@@ -94,6 +95,21 @@ struct si_screen {
 	struct si_shader_part		*tcs_epilogs;
 	struct si_shader_part		*ps_prologs;
 	struct si_shader_part		*ps_epilogs;
+
+	/* Shader cache in memory.
+	 *
+	 * Design & limitations:
+	 * - The shader cache is per screen (= per process), never saved to
+	 *   disk, and skips redundant shader compilations from TGSI to bytecode.
+	 * - It can only be used with one-variant-per-shader support, in which
+	 *   case only the main (typically middle) part of shaders is cached.
+	 * - Only VS, TCS, TES, PS are cached, out of which only the hw VS
+	 *   variants of VS and TES are cached, so LS and ES aren't.
+	 * - GS and CS aren't cached, but it's certainly possible to cache
+	 *   those as well.
+	 */
+	pipe_mutex			shader_cache_mutex;
+	struct hash_table		*shader_cache;
 };
 
 struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index b299b7b..ff5c24d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -364,8 +364,10 @@ struct si_shader {
 	struct r600_resource		*bo;
 	struct r600_resource		*scratch_bo;
 	union si_shader_key		key;
-	struct radeon_shader_binary	binary;
 	bool				is_binary_shared;
+
+	/* The following data is all that's needed for binary shaders. */
+	struct radeon_shader_binary	binary;
 	struct si_shader_config		config;
 	struct si_shader_info		info;
 };
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index f64c4d4..40792cb 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
 /* si_state_shader.c */
 bool si_update_shaders(struct si_context *sctx);
 void si_init_shader_functions(struct si_context *sctx);
+bool si_init_shader_cache(struct si_screen *sscreen);
+void si_destroy_shader_cache(struct si_screen *sscreen);
 
 /* si_state_draw.c */
 void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index c62cbb7..a6753a7 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -32,10 +32,221 @@
 
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_ureg.h"
+#include "util/hash_table.h"
+#include "util/u_hash.h"
 #include "util/u_memory.h"
 #include "util/u_prim.h"
 #include "util/u_simple_shaders.h"
 
+/* SHADER_CACHE */
+
+/**
+ * Return the TGSI binary in a buffer. The first 4 bytes contain its size as
+ * integer.
+ */
+static void *si_get_tgsi_binary(struct si_shader_selector *sel)
+{
+	unsigned tgsi_size = tgsi_num_tokens(sel->tokens) *
+			     sizeof(struct tgsi_token);
+	unsigned size = 4 + tgsi_size + sizeof(sel->so);
+	char *result = (char*)MALLOC(size);
+
+	if (!result)
+		return NULL;
+
+	*((uint32_t*)result) = size;
+	memcpy(result + 4, sel->tokens, tgsi_size);
+	memcpy(result + 4 + tgsi_size, &sel->so, sizeof(sel->so));
+	return result;
+}
+
+/** Copy "data" to "ptr" and return the next dword following copied data. */
+static uint32_t *write_data(uint32_t *ptr, const void *data, unsigned size)
+{
+	memcpy(ptr, data, size);
+	ptr += DIV_ROUND_UP(size, 4);
+	return ptr;
+}
+
+/** Read data from "ptr". Return the next dword following the data. */
+static uint32_t *read_data(uint32_t *ptr, void *data, unsigned size)
+{
+	memcpy(data, ptr, size);
+	ptr += DIV_ROUND_UP(size, 4);
+	return ptr;
+}
+
+/**
+ * Write the size as uint followed by the data. Return the next dword
+ * following the copied data.
+ */
+static uint32_t *write_chunk(uint32_t *ptr, const void *data, unsigned size)
+{
+	*ptr++ = size;
+	return write_data(ptr, data, size);
+}
+
+/**
+ * Read the size as uint followed by the data. Return both via parameters.
+ * Return the next dword following the data.
+ */
+static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size)
+{
+	*size = *ptr++;
+	assert(*data == NULL);
+	*data = malloc(*size);
+	return read_data(ptr, *data, *size);
+}
+
+/**
+ * Return the shader binary in a buffer. The first 4 bytes contain its size
+ * as integer.
+ */
+static void *si_get_shader_binary(struct si_shader *shader)
+{
+	/* There is always a size of data followed by the data itself. */
+	unsigned relocs_size = shader->binary.reloc_count *
+			       sizeof(shader->binary.relocs[0]);
+	unsigned disasm_size = strlen(shader->binary.disasm_string) + 1;
+	unsigned size =
+		4 + /* total size */
+		4 + /* CRC32 of the data below */
+		align(sizeof(shader->config), 4) +
+		align(sizeof(shader->info), 4) +
+		4 + align(shader->binary.code_size, 4) +
+		4 + align(shader->binary.rodata_size, 4) +
+		4 + align(relocs_size, 4) +
+		4 + align(disasm_size, 4);
+	void *buffer = CALLOC(1, size);
+	uint32_t *ptr = (uint32_t*)buffer;
+
+	if (!buffer)
+		return NULL;
+
+	*ptr++ = size;
+	ptr++; /* CRC32 is calculated at the end. */
+
+	ptr = write_data(ptr, &shader->config, sizeof(shader->config));
+	ptr = write_data(ptr, &shader->info, sizeof(shader->info));
+	ptr = write_chunk(ptr, shader->binary.code, shader->binary.code_size);
+	ptr = write_chunk(ptr, shader->binary.rodata, shader->binary.rodata_size);
+	ptr = write_chunk(ptr, shader->binary.relocs, relocs_size);
+	ptr = write_chunk(ptr, shader->binary.disasm_string, disasm_size);
+	assert((char *)ptr - (char *)buffer == size);
+
+	/* Compute CRC32. */
+	ptr = (uint32_t*)buffer;
+	ptr++;
+	*ptr = util_hash_crc32(ptr + 1, size - 8);
+
+	return buffer;
+}
+
+static bool si_load_shader_binary(struct si_shader *shader, void *binary)
+{
+	uint32_t *ptr = (uint32_t*)binary;
+	uint32_t size = *ptr++;
+	uint32_t crc32 = *ptr++;
+	unsigned chunk_size;
+
+	if (util_hash_crc32(ptr, size - 8) != crc32) {
+		fprintf(stderr, "radeonsi: binary shader has invalid CRC32\n");
+		return false;
+	}
+
+	ptr = read_data(ptr, &shader->config, sizeof(shader->config));
+	ptr = read_data(ptr, &shader->info, sizeof(shader->info));
+	ptr = read_chunk(ptr, (void**)&shader->binary.code,
+			 &shader->binary.code_size);
+	ptr = read_chunk(ptr, (void**)&shader->binary.rodata,
+			 &shader->binary.rodata_size);
+	ptr = read_chunk(ptr, (void**)&shader->binary.relocs, &chunk_size);
+	shader->binary.reloc_count = chunk_size / sizeof(shader->binary.relocs[0]);
+	ptr = read_chunk(ptr, (void**)&shader->binary.disasm_string, &chunk_size);
+
+	return true;
+}
+
+/**
+ * Insert a shader into the cache. It's assumed the shader is not in the cache.
+ * Use si_shader_cache_load_shader before calling this.
+ *
+ * Returns false on failure, in which case the tgsi_binary should be freed.
+ */
+static bool si_shader_cache_insert_shader(struct si_screen *sscreen,
+					  void *tgsi_binary,
+					  struct si_shader *shader)
+{
+	void *hw_binary = si_get_shader_binary(shader);
+
+	if (!hw_binary)
+		return false;
+
+	if (_mesa_hash_table_insert(sscreen->shader_cache, tgsi_binary,
+				    hw_binary) == NULL) {
+		FREE(hw_binary);
+		return false;
+	}
+
+	return true;
+}
+
+static bool si_shader_cache_load_shader(struct si_screen *sscreen,
+					void *tgsi_binary,
+				        struct si_shader *shader)
+{
+	struct hash_entry *entry =
+		_mesa_hash_table_search(sscreen->shader_cache, tgsi_binary);
+	if (!entry)
+		return false;
+
+	return si_load_shader_binary(shader, entry->data);
+}
+
+static uint32_t si_shader_cache_key_hash(const void *key)
+{
+	/* The first dword is the key size. */
+	return util_hash_crc32(key, *(uint32_t*)key);
+}
+
+static bool si_shader_cache_key_equals(const void *a, const void *b)
+{
+	uint32_t *keya = (uint32_t*)a;
+	uint32_t *keyb = (uint32_t*)b;
+
+	/* The first dword is the key size. */
+	if (*keya != *keyb)
+		return false;
+
+	return memcmp(keya, keyb, *keya) == 0;
+}
+
+static void si_destroy_shader_cache_entry(struct hash_entry *entry)
+{
+	FREE((void*)entry->key);
+	FREE(entry->data);
+}
+
+bool si_init_shader_cache(struct si_screen *sscreen)
+{
+	pipe_mutex_init(sscreen->shader_cache_mutex);
+	sscreen->shader_cache =
+		_mesa_hash_table_create(NULL,
+					si_shader_cache_key_hash,
+					si_shader_cache_key_equals);
+	return sscreen->shader_cache != NULL;
+}
+
+void si_destroy_shader_cache(struct si_screen *sscreen)
+{
+	if (sscreen->shader_cache)
+		_mesa_hash_table_destroy(sscreen->shader_cache,
+					 si_destroy_shader_cache_entry);
+	pipe_mutex_destroy(sscreen->shader_cache_mutex);
+}
+
+/* SHADER STATES */
+
 static void si_set_tesseval_regs(struct si_shader *shader,
 				 struct si_pm4_state *pm4)
 {
@@ -936,17 +1147,37 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 	if (sel->type != PIPE_SHADER_GEOMETRY &&
 	    !sscreen->use_monolithic_shaders) {
 		struct si_shader *shader = CALLOC_STRUCT(si_shader);
+		void *tgsi_binary;
 
 		if (!shader)
 			goto error;
 
 		shader->selector = sel;
 
-		if (si_compile_tgsi_shader(sscreen, sctx->tm, shader, false,
-					   &sctx->b.debug) != 0) {
-			FREE(shader);
-			goto error;
+		tgsi_binary = si_get_tgsi_binary(sel);
+
+		/* Try to load the shader from the shader cache. */
+		pipe_mutex_lock(sscreen->shader_cache_mutex);
+
+		if (tgsi_binary &&
+		    si_shader_cache_load_shader(sscreen, tgsi_binary, shader)) {
+			FREE(tgsi_binary);
+		} else {
+			/* Compile the shader if it hasn't been loaded from the cache. */
+			if (si_compile_tgsi_shader(sscreen, sctx->tm, shader, false,
+						   &sctx->b.debug) != 0) {
+				FREE(shader);
+				FREE(tgsi_binary);
+				pipe_mutex_unlock(sscreen->shader_cache_mutex);
+				goto error;
+			}
+
+			if (tgsi_binary &&
+			    !si_shader_cache_insert_shader(sscreen, tgsi_binary, shader))
+				FREE(tgsi_binary);
 		}
+		pipe_mutex_unlock(sscreen->shader_cache_mutex);
+
 		sel->main_shader_part = shader;
 	}
 
-- 
2.5.0

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to