Changes in v4: - Move code to nv50_ir_dump.cpp - Dump headers of nvc0 programs - Use CRC-32 instead of a truncated SHA1 - Set prog->maxGPR to targ->getFileSize() - 1 and set prog->tlsSize - Don't compile the program if a replacement is offered This has the consequence that a program is not dumped when it's replaced Changes in v3: - Fixed messed up patch description and diff - Use the checksum of the TGSI instead of the binary if possible Changes in v2: - move "#ifdef DEBUG" from above dumpProgram to above createDumpFilename
The NV50_PROG_DUMP environment variable specifies a (already created) directory to dump shader binaries, headers and tgsi code. The NV50_PROG_REPLACE environment variable specifies a (already created) directory that is searched to find replacement binaries and headers. This is all much like MESA_SHADER_DUMP_PATH and MESA_SHADER_READ_PATH expect using CRC-32 checksums instead of program IDs and chip-specific binaries instead of GLSL. Signed-off-by: Rhys Perry <pendingchao...@gmail.com> --- src/gallium/auxiliary/tgsi/tgsi_util.h | 1 + src/gallium/drivers/nouveau/Makefile.sources | 2 + src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 40 +++-- .../drivers/nouveau/codegen/nv50_ir_driver.h | 1 + .../drivers/nouveau/codegen/nv50_ir_dump.cpp | 171 +++++++++++++++++++++ src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h | 70 +++++++++ src/gallium/drivers/nouveau/meson.build | 2 + src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 138 +++++++++++------ 8 files changed, 360 insertions(+), 65 deletions(-) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_dump.cpp create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h index 686b90f467..81cf955d8f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -28,6 +28,7 @@ #ifndef TGSI_UTIL_H #define TGSI_UTIL_H +#include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" #if defined __cplusplus diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 65f08c7d8d..e867221818 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -114,6 +114,8 @@ NV50_CODEGEN_SOURCES := \ codegen/nv50_ir_build_util.cpp \ codegen/nv50_ir_build_util.h \ codegen/nv50_ir_driver.h \ + codegen/nv50_ir_dump.cpp \ + codegen/nv50_ir_dump.h \ codegen/nv50_ir_emit_nv50.cpp \ codegen/nv50_ir_from_tgsi.cpp \ codegen/nv50_ir_graph.cpp \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index c987da9908..b1782bb4f2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -23,6 +23,7 @@ #include "codegen/nv50_ir.h" #include "codegen/nv50_ir_target.h" #include "codegen/nv50_ir_driver.h" +#include "codegen/nv50_ir_dump.h" extern "C" { #include "nouveau_debug.h" @@ -1244,30 +1245,35 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) prog->print(); targ->parseDriverInfo(info); - prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA); - prog->convertToSSA(); + if (!nv50_ir::replaceProgramCode(prog)) { + prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA); - if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) - prog->print(); + prog->convertToSSA(); - prog->optimizeSSA(info->optLevel); - prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA); + if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) + prog->print(); - if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) - prog->print(); + prog->optimizeSSA(info->optLevel); + prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA); - if (!prog->registerAllocation()) { - ret = -4; - goto out; - } - prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA); + if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) + prog->print(); - prog->optimizePostRA(info->optLevel); + if (!prog->registerAllocation()) { + ret = -4; + goto out; + } + prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA); - if (!prog->emitBinary(info)) { - ret = -5; - goto out; + prog->optimizePostRA(info->optLevel); + + if (!prog->emitBinary(info)) { + ret = -5; + goto out; + } + + nv50_ir::dumpProgramCodeAndIR(prog); } out: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 3d0782f86b..9c23c74628 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -93,6 +93,7 @@ struct nv50_ir_prog_info uint32_t codeSize; uint32_t instructions; uint8_t sourceRep; /* PIPE_SHADER_IR_* */ + uint32_t sourceHash; /* CRC-32 */ const void *source; void *relocData; void *fixupData; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.cpp new file mode 100644 index 0000000000..2d421e8e03 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.cpp @@ -0,0 +1,171 @@ +#include "nv50_ir_dump.h" + +#include "codegen/nv50_ir_target.h" +#include "tgsi/tgsi_dump.h" +#include "util/crc32.h" + +#ifdef DEBUG +static char * +createDumpFilename(const char *dir, const nv50_ir_prog_info *info, const char *ext) +{ + char* fname = (char*)MALLOC(strlen(dir) + 13 + strlen(ext)); + if (dir[0] && dir[strlen(dir) - 1] == '/') + sprintf(fname, "%s%.8x", dir, info->bin.sourceHash); + else + sprintf(fname, "%s/%.8x", dir, info->bin.sourceHash); + + switch (info->type) { + case PIPE_SHADER_VERTEX: + strcat(fname, ".vs"); + break; + case PIPE_SHADER_TESS_CTRL: + strcat(fname, ".tcs"); + break; + case PIPE_SHADER_TESS_EVAL: + strcat(fname, ".tes"); + break; + case PIPE_SHADER_GEOMETRY: + strcat(fname, ".gs"); + break; + case PIPE_SHADER_FRAGMENT: + strcat(fname, ".fs"); + break; + case PIPE_SHADER_COMPUTE: + strcat(fname, ".cs"); + break; + default: + assert(0); + break; + } + + strcat(fname, ext); + + return fname; +} + +extern "C" { + +void +nv50_ir_create_source_hash(nv50_ir_prog_info *info) +{ + switch (info->bin.sourceRep) { + case PIPE_SHADER_IR_TGSI: { + const tgsi_header* header = (const tgsi_header*)info->bin.source; + unsigned size = (header->HeaderSize + header->BodySize) * sizeof(tgsi_token); + info->bin.sourceHash = util_hash_crc32(info->bin.source, size); + break; + } + default: + assert(0); + break; + } +} + +FILE * +nv50_ir_begin_dump(const nv50_ir_prog_info *info, const char *what, + const char *ext, bool binary) +{ + const char *dump_dir = debug_get_option("NV50_PROG_DUMP", NULL); + if (!dump_dir) + return NULL; + + char* fname = createDumpFilename(dump_dir, info, ext); + + FILE *fp = fopen(fname, binary ? "wb" : "w"); + if (!fp) { + INFO("Failed to dump %s of a program to %s\n", what, fname); + return NULL; + } + + INFO("Dumping %s of a program to %s\n", what, fname); + + FREE(fname); + + return fp; +} + +bool +nv50_ir_get_replacement(const nv50_ir_prog_info *info, const char *what, + const char *ext, size_t *size, void **data) +{ + const char *replace_dir = debug_get_option("NV50_PROG_REPLACE", NULL); + if (!replace_dir) + return false; + + char* fname = createDumpFilename(replace_dir, info, ext); + + FILE *fp = fopen(fname, "rb"); + if (!fp) + return false; + + *size = 0; + *data = MALLOC(65536); + + size_t bufSize = 65536; + size_t read = 0; + while ((read = fread(*data, 1, bufSize - *size, fp))) { + *size += read; + if (*size == bufSize) { + *data = REALLOC(*data, bufSize, bufSize * 2); + bufSize *= 2; + } + } + + INFO("Replacing code of a program with that from %s\n", fname); + + FREE(fname); + + return true; +} + +} + +namespace nv50_ir { + +void +dumpProgramCodeAndIR(const nv50_ir::Program *prog) +{ + FILE *fp = nv50_ir_begin_dump(prog->driver, "code", ".bin", true); + if (fp) { + fwrite(prog->code, prog->binSize, 1, fp); + fclose(fp); + } + + switch (prog->driver->bin.sourceRep) { + case PIPE_SHADER_IR_TGSI: { + const tgsi_token *tokens = (const tgsi_token *)prog->driver->bin.source; + fp = nv50_ir_begin_dump(prog->driver, "tgsi", ".tgsi.txt", false); + if (fp) { + tgsi_dump_to_file(tokens, 0, fp); + fclose(fp); + } + break; + } + default: + assert(0); + break; + } +} + +bool +replaceProgramCode(nv50_ir::Program *prog) +{ + const nv50_ir::Target* targ = prog->getTarget(); + + size_t size; + void *data; + if (!nv50_ir_get_replacement(prog->driver, "code", ".bin", &size, &data)) + return false; + + FREE(prog->code); + prog->code = (uint32_t*)data; + prog->binSize = size; + prog->maxGPR = targ->getFileSize(nv50_ir::FILE_GPR) - 1; + prog->tlsSize = targ->getFileSize(nv50_ir::FILE_MEMORY_LOCAL); + + return true; +} + +} // namespace nv50_ir + +#endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h new file mode 100644 index 0000000000..80f4f7e7d5 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h @@ -0,0 +1,70 @@ +#ifndef __NV50_IR_DUMP__ +#define __NV50_IR_DUMP__ + +#include <stdio.h> +#include "util/macros.h" /* For ALWAYS_INLINE */ +#include "nv50_ir_driver.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef DEBUG +void +nv50_ir_create_source_hash(struct nv50_ir_prog_info *info); + +FILE * +nv50_ir_begin_dump(const struct nv50_ir_prog_info *info, const char *what, + const char *ext, bool binary); + +bool +nv50_ir_get_replacement(const struct nv50_ir_prog_info *info, const char *what, + const char *ext, size_t *size, void **data); +#else +ALWAYS_INLINE void +nv50_ir_create_source_hash(struct nv50_ir_prog_info *info) { + info->bin.sourceHash = 0; +} + +ALWAYS_INLINE FILE * +nv50_ir_begin_dump(const struct nv50_ir_prog_info *info, const char *what, + const char *ext, bool binary) +{ + return NULL; +} + +ALWAYS_INLINE bool +nv50_ir_get_replacement(const struct nv50_ir_prog_info *info, const char *what, + const char *ext, size_t *size, void **data) +{ + return false; +} +#endif + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus +#include "nv50_ir.h" + +namespace nv50_ir { + +#ifdef DEBUG +void +dumpProgramCodeAndIR(const Program *prog); + +bool +replaceProgramCode(Program *prog); +#else +ALWAYS_INLINE void +dumpProgramCodeAndIR(Program *prog) {} + +ALWAYS_INLINE bool +replaceProgramCode(Program *prog) {return false;} +#endif + +} // namespace nv50_ir +#endif + +#endif diff --git a/src/gallium/drivers/nouveau/meson.build b/src/gallium/drivers/nouveau/meson.build index 242ee0e000..f7774326f2 100644 --- a/src/gallium/drivers/nouveau/meson.build +++ b/src/gallium/drivers/nouveau/meson.build @@ -128,6 +128,8 @@ files_libnouveau = files( 'codegen/nv50_ir_build_util.cpp', 'codegen/nv50_ir_build_util.h', 'codegen/nv50_ir_driver.h', + 'codegen/nv50_ir_dump.cpp', + 'codegen/nv50_ir_dump.h', 'codegen/nv50_ir_emit_nv50.cpp', 'codegen/nv50_ir_from_tgsi.cpp', 'codegen/nv50_ir_graph.cpp', diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 9520d984bb..06d989a3f8 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -27,6 +27,7 @@ #include "nvc0/nvc0_context.h" #include "codegen/nv50_ir_driver.h" +#include "codegen/nv50_ir_dump.h" #include "nvc0/nve4_compute.h" /* NOTE: Using a[0x270] in FP may cause an error even if we're using less than @@ -506,6 +507,64 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) return 0; } +static int +nvc0_program_create_header(struct nvc0_program *prog, + struct nv50_ir_prog_info *info) +{ + int ret = 0; + switch (prog->type) { + case PIPE_SHADER_VERTEX: + ret = nvc0_vp_gen_header(prog, info); + break; + case PIPE_SHADER_TESS_CTRL: + ret = nvc0_tcp_gen_header(prog, info); + break; + case PIPE_SHADER_TESS_EVAL: + ret = nvc0_tep_gen_header(prog, info); + break; + case PIPE_SHADER_GEOMETRY: + ret = nvc0_gp_gen_header(prog, info); + break; + case PIPE_SHADER_FRAGMENT: + ret = nvc0_fp_gen_header(prog, info); + break; + case PIPE_SHADER_COMPUTE: + prog->cp.syms = info->bin.syms; + prog->cp.num_syms = info->bin.numSyms; + break; + default: + ret = -1; + NOUVEAU_ERR("unknown program type: %u\n", prog->type); + break; + } + if (ret) + return ret; + + if (info->bin.tlsSpace) { + assert(info->bin.tlsSpace < (1 << 24)); + prog->hdr[0] |= 1 << 26; + prog->hdr[1] |= align(info->bin.tlsSpace, 0x10); /* l[] size */ + prog->need_tls = true; + } + /* TODO: factor 2 only needed where joinat/precont is used, + * and we only have to count non-uniform branches + */ + /* + if ((info->maxCFDepth * 2) > 16) { + prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200; + prog->need_tls = true; + } + */ + if (info->io.globalAccess) + prog->hdr[0] |= 1 << 26; + if (info->io.globalAccess & 0x2) + prog->hdr[0] |= 1 << 16; + if (info->io.fp64) + prog->hdr[0] |= 1 << 27; + + return 0; +} + static struct nvc0_transform_feedback_state * nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info, const struct pipe_stream_output_info *pso) @@ -565,6 +624,30 @@ nvc0_program_dump(struct nvc0_program *prog) } #endif +static void +nvc0_dump_header(struct nvc0_program *prog, struct nv50_ir_prog_info *info) +{ + FILE *fp = nv50_ir_begin_dump(info, "header", ".hdr", true); + if (fp) { + fwrite(prog->hdr, sizeof(prog->hdr), 1, fp); + fclose(fp); + } +} + +static bool +nvc0_replace_header(struct nvc0_program *prog, struct nv50_ir_prog_info *info) +{ + size_t size; + void *data; + if (!nv50_ir_get_replacement(info, "header", ".hdr", &size, &data)) + return false; + + memcpy(prog->hdr, data, MIN2(size, 20)); + FREE(data); + + return true; +} + bool nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, struct pipe_debug_callback *debug) @@ -618,6 +701,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->assignSlots = nvc0_program_assign_varying_slots; + nv50_ir_create_source_hash(info); + ret = nv50_ir_generate_code(info); if (ret) { NOUVEAU_ERR("shader translation failed: %i\n", ret); @@ -641,55 +726,12 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->out[info->io.edgeFlagOut].mask = 0; /* for headergen */ prog->vp.edgeflag = info->io.edgeFlagIn; - switch (prog->type) { - case PIPE_SHADER_VERTEX: - ret = nvc0_vp_gen_header(prog, info); - break; - case PIPE_SHADER_TESS_CTRL: - ret = nvc0_tcp_gen_header(prog, info); - break; - case PIPE_SHADER_TESS_EVAL: - ret = nvc0_tep_gen_header(prog, info); - break; - case PIPE_SHADER_GEOMETRY: - ret = nvc0_gp_gen_header(prog, info); - break; - case PIPE_SHADER_FRAGMENT: - ret = nvc0_fp_gen_header(prog, info); - break; - case PIPE_SHADER_COMPUTE: - prog->cp.syms = info->bin.syms; - prog->cp.num_syms = info->bin.numSyms; - break; - default: - ret = -1; - NOUVEAU_ERR("unknown program type: %u\n", prog->type); - break; - } - if (ret) - goto out; - - if (info->bin.tlsSpace) { - assert(info->bin.tlsSpace < (1 << 24)); - prog->hdr[0] |= 1 << 26; - prog->hdr[1] |= align(info->bin.tlsSpace, 0x10); /* l[] size */ - prog->need_tls = true; + if (!nvc0_replace_header(prog, info)) { + ret = nvc0_program_create_header(prog, info); + if (ret) + goto out; + nvc0_dump_header(prog, info); } - /* TODO: factor 2 only needed where joinat/precont is used, - * and we only have to count non-uniform branches - */ - /* - if ((info->maxCFDepth * 2) > 16) { - prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200; - prog->need_tls = true; - } - */ - if (info->io.globalAccess) - prog->hdr[0] |= 1 << 26; - if (info->io.globalAccess & 0x2) - prog->hdr[0] |= 1 << 16; - if (info->io.fp64) - prog->hdr[0] |= 1 << 27; if (prog->pipe.stream_output.num_outputs) prog->tfb = nvc0_program_create_tfb_state(info, -- 2.14.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev