Implements a tgsi cache for the OpenGL state tracker. V2: add support for compute shaders --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 222 +++++++++++++++++++++++++++++ src/mesa/state_tracker/st_program.c | 133 ++++++++++++++++- 2 files changed, 350 insertions(+), 5 deletions(-)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 630f5af..b485776 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -32,6 +32,7 @@ #include "st_glsl_to_tgsi.h" +#include "compiler/glsl/blob.h" #include "compiler/glsl/glsl_parser_extras.h" #include "compiler/glsl/ir_optimization.h" #include "compiler/glsl/program.h" @@ -47,6 +48,8 @@ #include "pipe/p_screen.h" #include "tgsi/tgsi_ureg.h" #include "tgsi/tgsi_info.h" +#include "util/disk_cache.h" +#include "util/mesa-sha1.h" #include "util/u_math.h" #include "util/u_memory.h" #include "st_program.h" @@ -6999,6 +7002,219 @@ has_unsupported_control_flow(exec_list *ir, return visitor.unsupported; } +static void +read_stream_out_from_cache(struct blob_reader *blob_reader, + struct pipe_shader_state *tgsi) +{ + blob_copy_bytes(blob_reader, (uint8_t *) &tgsi->stream_output, + sizeof(tgsi->stream_output)); +} + +static void +read_tgsi_from_cache(struct blob_reader *blob_reader, + const struct tgsi_token **tokens) +{ + uint32_t num_tokens = blob_read_uint32(blob_reader); + unsigned tokens_size = num_tokens * sizeof(struct tgsi_token); + *tokens = (const tgsi_token*) MALLOC(tokens_size); + blob_copy_bytes(blob_reader, (uint8_t *) *tokens, tokens_size); +} + +static void +load_tgsi_from_disk_cache(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + unsigned char sha1[20]; + char sha1_buf[41]; + struct st_context *st = st_context(ctx); + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + char *buf = ralloc_strdup(NULL, "tsgi_tokens "); + _mesa_sha1_format(sha1_buf, + prog->_LinkedShaders[i]->Program->sh.data->sha1); + ralloc_strcat(&buf, sha1_buf); + + struct gl_program *glprog = prog->_LinkedShaders[i]->Program; + switch (glprog->info.stage) { + case MESA_SHADER_VERTEX: + ralloc_strcat(&buf, " vs"); + _mesa_sha1_compute(buf, strlen(buf), sha1); + break; + case MESA_SHADER_TESS_EVAL: + ralloc_strcat(&buf, " tes"); + _mesa_sha1_compute(buf, strlen(buf), sha1); + break; + case MESA_SHADER_TESS_CTRL: + ralloc_strcat(&buf, " tcs"); + _mesa_sha1_compute(buf, strlen(buf), sha1); + break; + case MESA_SHADER_GEOMETRY: + ralloc_strcat(&buf, " gs"); + _mesa_sha1_compute(buf, strlen(buf), sha1); + break; + case MESA_SHADER_FRAGMENT: + ralloc_strcat(&buf, " fs"); + _mesa_sha1_compute(buf, strlen(buf), sha1); + break; + case MESA_SHADER_COMPUTE: + ralloc_strcat(&buf, " cs"); + _mesa_sha1_compute(buf, strlen(buf), sha1); + break; + + default: + unreachable("Unsupported stage"); + } + + size_t size; + uint8_t *buffer = (uint8_t *) disk_cache_get(ctx->Cache, sha1, &size); + if (buffer) { + struct blob_reader blob_reader; + blob_reader_init(&blob_reader, buffer, size); + + switch (glprog->info.stage) { + case MESA_SHADER_VERTEX: { + struct st_vertex_program *stvp = + (struct st_vertex_program *) glprog; + + st_release_vp_variants(st, stvp); + + stvp->num_inputs = blob_read_uint32(&blob_reader); + blob_copy_bytes(&blob_reader, (uint8_t *) stvp->index_to_input, + sizeof(stvp->index_to_input)); + blob_copy_bytes(&blob_reader, (uint8_t *) stvp->result_to_output, + sizeof(stvp->result_to_output)); + + read_stream_out_from_cache(&blob_reader, &stvp->tgsi); + read_tgsi_from_cache(&blob_reader, &stvp->tgsi.tokens); + + if (st->vp == stvp) + st->dirty |= ST_NEW_VERTEX_PROGRAM(st, stvp); + + break; + } + case MESA_SHADER_TESS_EVAL: { + struct st_tesseval_program *sttep = + (struct st_tesseval_program *) glprog; + + st_release_basic_variants(st, sttep->Base.Target, + &sttep->variants, &sttep->tgsi); + + read_stream_out_from_cache(&blob_reader, &sttep->tgsi); + read_tgsi_from_cache(&blob_reader, &sttep->tgsi.tokens); + + if (st->tep == sttep) + st->dirty |= sttep->affected_states; + + break; + } + case MESA_SHADER_TESS_CTRL: { + struct st_tessctrl_program *sttcp = + (struct st_tessctrl_program *) glprog; + + st_release_basic_variants(st, sttcp->Base.Target, + &sttcp->variants, &sttcp->tgsi); + + read_stream_out_from_cache(&blob_reader, &sttcp->tgsi); + read_tgsi_from_cache(&blob_reader, &sttcp->tgsi.tokens); + + if (st->tcp == sttcp) + st->dirty |= sttcp->affected_states; + + break; + } + case MESA_SHADER_GEOMETRY: { + struct st_geometry_program *stgp = + (struct st_geometry_program *) glprog; + + st_release_basic_variants(st, stgp->Base.Target, &stgp->variants, + &stgp->tgsi); + + read_stream_out_from_cache(&blob_reader, &stgp->tgsi); + read_tgsi_from_cache(&blob_reader, &stgp->tgsi.tokens); + + if (st->gp == stgp) + st->dirty |= stgp->affected_states; + + break; + } + case MESA_SHADER_FRAGMENT: { + struct st_fragment_program *stfp = + (struct st_fragment_program *) glprog; + + st_release_fp_variants(st, stfp); + + read_tgsi_from_cache(&blob_reader, &stfp->tgsi.tokens); + + if (st->fp == stfp) + st->dirty |= stfp->affected_states; + + break; + } + case MESA_SHADER_COMPUTE: { + struct st_compute_program *stcp = + (struct st_compute_program *) glprog; + + st_release_cp_variants(st, stcp); + + read_tgsi_from_cache(&blob_reader, + (const tgsi_token**) &stcp->tgsi.prog); + + stcp->tgsi.req_local_mem = stcp->Base.info.cs.shared_size; + stcp->tgsi.req_private_mem = 0; + stcp->tgsi.req_input_mem = 0; + + if (st->cp == stcp) + st->dirty |= stcp->affected_states; + + break; + } + default: + unreachable("Unsupported stage"); + } + + if (blob_reader.current != blob_reader.end || blob_reader.overrun) { + /* Something very bad has gone wrong discard the item from the + * cache and rebuild/link from source. + */ + assert(!"Invalid TGSI shader disk cache item!"); + + if (ctx->_Shader->Flags & GLSL_CACHE_INFO) { + fprintf(stderr, "Error reading program from cache (invalid " + "TGSI cache item)\n"); + } + + disk_cache_remove(ctx->Cache, sha1); + for (unsigned i = 0; i < prog->NumShaders; i++) { + _mesa_glsl_compile_shader(ctx, prog->Shaders[i], false, false, + true); + } + + prog->data->cache_fallback = true; + _mesa_glsl_link_shader(ctx, prog); + + free(buf); + return; + } + + if (ctx->_Shader->Flags & GLSL_CACHE_INFO) { + _mesa_sha1_format(sha1_buf, sha1); + fprintf(stderr, "%s tgsi_tokens retrieved from cache: %s\n", + _mesa_shader_stage_to_string(i), sha1_buf); + } + + set_prog_affected_state_flags(glprog); + _mesa_associate_uniform_storage(ctx, prog, glprog->Parameters); + + free(buffer); + } + + ralloc_free(buf); + } +} + extern "C" { /** @@ -7010,6 +7226,12 @@ extern "C" { GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { + /* Return early if we are loading the shader from on-disk cache */ + if (ctx->Cache && prog->data->LinkStatus == linking_skipped) { + load_tgsi_from_disk_cache(ctx, prog); + return GL_TRUE; + } + struct pipe_screen *pscreen = ctx->st->pipe->screen; assert(prog->data->LinkStatus); diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index b2d1586..c85055e 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -37,7 +37,10 @@ #include "program/prog_parameter.h" #include "program/prog_print.h" #include "program/programopt.h" +#include "util/disk_cache.h" +#include "util/mesa-sha1.h" +#include "compiler/glsl/blob.h" #include "compiler/nir/nir.h" #include "pipe/p_context.h" @@ -223,6 +226,111 @@ st_release_cp_variants(struct st_context *st, struct st_compute_program *stcp) } } +static void +write_stream_out_to_cache(struct blob *blob, + struct pipe_shader_state *tgsi) +{ + blob_write_bytes(blob, &tgsi->stream_output, + sizeof(tgsi->stream_output)); +} + +static void +write_tgsi_to_cache(struct blob *blob, struct pipe_shader_state *tgsi, + struct st_context *st, unsigned char *sha1, + unsigned num_tokens) +{ + blob_write_uint32(blob, num_tokens); + blob_write_bytes(blob, tgsi->tokens, + num_tokens * sizeof(struct tgsi_token)); + + disk_cache_put(st->ctx->Cache, sha1, blob->data, blob->size); +} + +/** + * Store tgsi and any other required state in on-disk shader cache. + */ +static void +cache_tgsi(struct st_context *st, struct gl_program *prog, + struct pipe_shader_state *out_state, unsigned num_tokens) +{ + if (!st->ctx->Cache) + return; + + /* Exit early when we are dealing with a ff shader with no source file to + * generate a source from. + */ + if (*prog->sh.data->sha1 == 0) + return; + + unsigned char sha1[20]; + char sha1_buf[41]; + char *buf = ralloc_strdup(NULL, "tsgi_tokens "); + struct blob *blob = blob_create(NULL); + + _mesa_sha1_format(sha1_buf, prog->sh.data->sha1); + ralloc_strcat(&buf, sha1_buf); + + switch (prog->info.stage) { + case MESA_SHADER_VERTEX: { + ralloc_strcat(&buf, " vs"); + _mesa_sha1_compute(buf, strlen(buf), sha1); + + struct st_vertex_program *stvp = (struct st_vertex_program *) prog; + + blob_write_uint32(blob, stvp->num_inputs); + blob_write_bytes(blob, stvp->index_to_input, + sizeof(stvp->index_to_input)); + blob_write_bytes(blob, stvp->result_to_output, + sizeof(stvp->result_to_output)); + + write_stream_out_to_cache(blob, &stvp->tgsi); + write_tgsi_to_cache(blob, &stvp->tgsi, st, sha1, num_tokens); + break; + } + case MESA_SHADER_TESS_EVAL: + ralloc_strcat(&buf, " tes"); + _mesa_sha1_compute(buf, strlen(buf), sha1); + write_stream_out_to_cache(blob, out_state); + write_tgsi_to_cache(blob, out_state, st, sha1, num_tokens); + break; + case MESA_SHADER_TESS_CTRL: + ralloc_strcat(&buf, " tcs"); + _mesa_sha1_compute(buf, strlen(buf), sha1); + write_stream_out_to_cache(blob, out_state); + write_tgsi_to_cache(blob, out_state, st, sha1, num_tokens); + break; + case MESA_SHADER_GEOMETRY: + ralloc_strcat(&buf, " gs"); + _mesa_sha1_compute(buf, strlen(buf), sha1); + write_stream_out_to_cache(blob, out_state); + write_tgsi_to_cache(blob, out_state, st, sha1, num_tokens); + break; + case MESA_SHADER_FRAGMENT: { + ralloc_strcat(&buf, " fs"); + _mesa_sha1_compute(buf, strlen(buf), sha1); + + struct st_fragment_program *stfp = (struct st_fragment_program *) prog; + write_tgsi_to_cache(blob, &stfp->tgsi, st, sha1, num_tokens); + break; + } + case MESA_SHADER_COMPUTE: + ralloc_strcat(&buf, " cs"); + _mesa_sha1_compute(buf, strlen(buf), sha1); + write_tgsi_to_cache(blob, out_state, st, sha1, num_tokens); + break; + default: + unreachable("Unsupported stage"); + } + + if (st->ctx->_Shader->Flags & GLSL_CACHE_INFO) { + _mesa_sha1_format(sha1_buf, sha1); + fprintf(stderr, "putting %s tgsi_tokens in cache: %s\n", + _mesa_shader_stage_to_string(prog->info.stage), sha1_buf); + } + + ralloc_free(blob); + ralloc_free(buf); +} /** * Translate a vertex program. @@ -442,7 +550,6 @@ st_translate_vertex_program(struct st_context *st, &stvp->tgsi.stream_output); free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi); - stvp->glsl_to_tgsi = NULL; } else error = st_translate_mesa_program(st->ctx, PIPE_SHADER_VERTEX, @@ -467,8 +574,15 @@ st_translate_vertex_program(struct st_context *st, return false; } - stvp->tgsi.tokens = ureg_get_tokens(ureg, NULL); + unsigned num_tokens; + stvp->tgsi.tokens = ureg_get_tokens(ureg, &num_tokens); ureg_destroy(ureg); + + if (stvp->glsl_to_tgsi) { + stvp->glsl_to_tgsi = NULL; + cache_tgsi(st, &stvp->Base, NULL, num_tokens); + } + return stvp->tgsi.tokens != NULL; } @@ -890,7 +1004,6 @@ st_translate_fragment_program(struct st_context *st, fs_output_semantic_index); free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi); - stfp->glsl_to_tgsi = NULL; } else if (stfp->ati_fs) st_translate_atifs_program(ureg, stfp->ati_fs, @@ -923,8 +1036,15 @@ st_translate_fragment_program(struct st_context *st, fs_output_semantic_name, fs_output_semantic_index); - stfp->tgsi.tokens = ureg_get_tokens(ureg, NULL); + unsigned num_tokens; + stfp->tgsi.tokens = ureg_get_tokens(ureg, &num_tokens); ureg_destroy(ureg); + + if (stfp->glsl_to_tgsi) { + stfp->glsl_to_tgsi = NULL; + cache_tgsi(st, &stfp->Base, NULL, num_tokens); + } + return stfp->tgsi.tokens != NULL; } @@ -1459,13 +1579,16 @@ st_translate_program_common(struct st_context *st, output_semantic_name, output_semantic_index); - out_state->tokens = ureg_get_tokens(ureg, NULL); + unsigned num_tokens; + out_state->tokens = ureg_get_tokens(ureg, &num_tokens); ureg_destroy(ureg); st_translate_stream_output_info(glsl_to_tgsi, outputMapping, &out_state->stream_output); + cache_tgsi(st, prog, out_state, num_tokens); + if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) { _mesa_print_program(prog); debug_printf("\n"); -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev