[Mesa-dev] [PATCH v3 3/8] nv50/ir: Add nv50_ir_prog_info_out serialize and deserialize
Adds functions for serializing and deserializing nv50_ir_prog_info_out structure, which are needed for shader caching. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 44 + .../nouveau/codegen/nv50_ir_emit_gk110.cpp| 14 +- .../nouveau/codegen/nv50_ir_emit_gm107.cpp| 14 +- .../nouveau/codegen/nv50_ir_emit_nv50.cpp | 6 +- .../nouveau/codegen/nv50_ir_emit_nvc0.cpp | 14 +- .../nouveau/codegen/nv50_ir_serialize.cpp | 185 ++ src/gallium/drivers/nouveau/meson.build | 1 + 7 files changed, 254 insertions(+), 24 deletions(-) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index dab1ce030cb..591aa8f57e8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -25,6 +25,7 @@ #include "pipe/p_shader_tokens.h" +#include "util/blob.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" @@ -242,6 +243,49 @@ nv50_ir_apply_fixups(void *fixupData, uint32_t *code, extern void nv50_ir_get_target_library(uint32_t chipset, const uint32_t **code, uint32_t *size); + +#ifdef __cplusplus +namespace nv50_ir +{ + class FixupEntry; + class FixupData; + + void + gk110_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + gm107_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + nv50_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, +const nv50_ir::FixupData& data); + void + nvc0_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, +const nv50_ir::FixupData& data); + void + gk110_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + gm107_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + nvc0_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + +} +#endif + +/* Serialize a nv50_ir_prog_info_out structure and save it into blob */ +extern bool MUST_CHECK +nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *); + +/* Deserialize from data and save into a nv50_ir_prog_info_out structure + * using a pointer. Size is a total size of the serialized data. + * Offset points to where info_out in data is located. */ +extern bool MUST_CHECK +nv50_ir_prog_info_out_deserialize(void *data, size_t size, size_t offset, + struct nv50_ir_prog_info_out *); + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 2118c3153f7..e651d7fdcb0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1209,8 +1209,8 @@ CodeEmitterGK110::emitSLCT(const CmpInstruction *i) } } -static void -selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) { int loc = entry->loc; if (data.force_persample_interp) @@ -1227,7 +1227,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i) code[1] |= 1 << 13; if (i->subOp == 1) { - addInterp(0, 0, selpFlip); + addInterp(0, 0, gk110_selpFlip); } } @@ -2042,8 +2042,8 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i) code[1] |= (i->ipa & 0xc) << (19 - 2); } -static void -interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +gk110_interpApply(const struct FixupEntry *entry, uint32_t *code, const FixupData& data) { int ipa = entry->ipa; int reg = entry->reg; @@ -2078,10 +2078,10 @@ CodeEmitterGK110::emitINTERP(const Instruction *i) if (i->op == OP_PINTERP) { srcId(i->src(1), 23); - addInterp(i->ipa, SDATA(i->src(1)).id, interpApply); + addInterp(i->ipa, SDATA(i->src(1)).id, gk110_interpApply); } else { code[0] |= 0xff << 23; - addInterp(i->ipa, 0xff, interpApply); + addInterp(i->ipa, 0xff, gk110_interpApply); } srcId(i->src(0).getIndirect(0), 10); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index e244bd0d610..4970f14cb33 100644 --- a/src/gallium/drivers/nouveau/code
[Mesa-dev] [PATCH v3 8/8] nv50: Add shader disk caching
Adds shader disk caching for nv50 to reduce the need to every time compile shaders. Shaders are saved into disk_shader_cache from nv50_screen structure. It serializes the input nv50_ir_prog_info to compute the hash key and also to do a byte compare between the original nv50_ir_prog_info and the one saved in the cache. If keys match and also the byte compare returns they are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the cache can be used instead of compiling input info. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/nv50/nv50_program.c | 283 +++--- .../drivers/nouveau/nv50/nv50_program.h | 2 + .../drivers/nouveau/nv50/nv50_shader_state.c | 4 +- src/gallium/drivers/nouveau/nv50/nv50_state.c | 1 + 4 files changed, 54 insertions(+), 236 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index f4f2e951fd5..a03d5b9f6d0 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -37,108 +37,6 @@ bitcount4(const uint32_t val) return cnt[val & 0xf]; } -static int -nv50_vertprog_assign_slots(struct nv50_ir_prog_info_out *info) -{ - struct nv50_program *prog = (struct nv50_program *)info->driverPriv; - unsigned i, n, c; - - n = 0; - for (i = 0; i < info->numInputs; ++i) { - prog->in[i].id = i; - prog->in[i].sn = info->in[i].sn; - prog->in[i].si = info->in[i].si; - prog->in[i].hw = n; - prog->in[i].mask = info->in[i].mask; - - prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32); - - for (c = 0; c < 4; ++c) - if (info->in[i].mask & (1 << c)) -info->in[i].slot[c] = n++; - - if (info->in[i].sn == TGSI_SEMANTIC_PRIMID) - prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID; - } - prog->in_nr = info->numInputs; - - for (i = 0; i < info->numSysVals; ++i) { - switch (info->sv[i].sn) { - case TGSI_SEMANTIC_INSTANCEID: - prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID; - continue; - case TGSI_SEMANTIC_VERTEXID: - prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; - prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; - continue; - default: - break; - } - } - - /* -* Corner case: VP has no inputs, but we will still need to submit data to -* draw it. HW will shout at us and won't draw anything if we don't enable -* any input, so let's just pretend it's the first one. -*/ - if (prog->vp.attrs[0] == 0 && - prog->vp.attrs[1] == 0 && - prog->vp.attrs[2] == 0) - prog->vp.attrs[0] |= 0xf; - - /* VertexID before InstanceID */ - if (info->io.vertexId < info->numSysVals) - info->sv[info->io.vertexId].slot[0] = n++; - if (info->io.instanceId < info->numSysVals) - info->sv[info->io.instanceId].slot[0] = n++; - - n = 0; - for (i = 0; i < info->numOutputs; ++i) { - switch (info->out[i].sn) { - case TGSI_SEMANTIC_PSIZE: - prog->vp.psiz = i; - break; - case TGSI_SEMANTIC_CLIPDIST: - prog->vp.clpd[info->out[i].si] = n; - break; - case TGSI_SEMANTIC_EDGEFLAG: - prog->vp.edgeflag = i; - break; - case TGSI_SEMANTIC_BCOLOR: - prog->vp.bfc[info->out[i].si] = i; - break; - case TGSI_SEMANTIC_LAYER: - prog->gp.has_layer = true; - prog->gp.layerid = n; - break; - case TGSI_SEMANTIC_VIEWPORT_INDEX: - prog->gp.has_viewport = true; - prog->gp.viewportid = n; - break; - default: - break; - } - prog->out[i].id = i; - prog->out[i].sn = info->out[i].sn; - prog->out[i].si = info->out[i].si; - prog->out[i].hw = n; - prog->out[i].mask = info->out[i].mask; - - for (c = 0; c < 4; ++c) - if (info->out[i].mask & (1 << c)) -info->out[i].slot[c] = n++; - } - prog->out_nr = info->numOutputs; - prog->max_out = n; - if (!prog->max_out) - prog->max_out = 1; - - if (prog->vp.psiz < info->numOutputs) - prog->vp.psiz = prog->out[prog->vp.psiz].hw; - - return 0; -} - static int nv50_vertprog_assign_slots_info(struct nv50_ir_prog_info_out *info) { @@ -263,115 +161,6 @@ nv50_vertprog_assign_slots_prog(struct nv50_ir_prog_info_out *info) return 0; } -static int -nv50_fragprog_assign_slots(struct nv50_ir_prog_info_out *info) -{ - struct nv50_program *prog = (struct nv50_program *)info->driverPriv; - unsigned i, n, m, c; - unsigned nvary; - unsi
[Mesa-dev] [PATCH v3 7/8] nv50: Add separate functions for varying bits
This separation will be needed for shader disk caching. The reason for it is that when loading shaders from cache, data in info structure already gets loaded. That means varying bits for info is needed only when compiling shaders and not needed when loading from cache. Varying bits for prog are needed in both cases. Unfortunately, I don't know how most of the code works, I have separated this manually, only by looking at the original code. That means that this patch is experimental. Together with following commit it works (there seem to be no regressions at all in VK-GL-CTS [openglcts/data/mustpass/gl/khronos_mustpass/4.6.1.x/gl33-master.txt] and all benchmarks behaved normally). Unfortunately, I cannot test in Piglit because of technical problems, so there might be still some work needed. I am mainly asking to help with the function names, look for bugs and pointing out useless code. I will be glad for every review. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/nv50/nv50_program.c | 344 ++ 1 file changed, 344 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index e36b8a0f8cc..f4f2e951fd5 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -139,6 +139,130 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info_out *info) return 0; } +static int +nv50_vertprog_assign_slots_info(struct nv50_ir_prog_info_out *info) +{ + unsigned i, n, c; + + n = 0; + for (i = 0; i < info->numInputs; ++i) { + for (c = 0; c < 4; ++c) + if (info->in[i].mask & (1 << c)) +info->in[i].slot[c] = n++; + } + + /* VertexID before InstanceID */ + if (info->io.vertexId < info->numSysVals) + info->sv[info->io.vertexId].slot[0] = n++; + if (info->io.instanceId < info->numSysVals) + info->sv[info->io.instanceId].slot[0] = n++; + + n = 0; + for (i = 0; i < info->numOutputs; ++i) { + for (c = 0; c < 4; ++c) + if (info->out[i].mask & (1 << c)) +info->out[i].slot[c] = n++; + } + + return 0; +} + +static int +nv50_vertprog_assign_slots_prog(struct nv50_ir_prog_info_out *info) +{ + struct nv50_program *prog = (struct nv50_program *)info->driverPriv; + unsigned i, n, c; + + n = 0; + for (i = 0; i < info->numInputs; ++i) { + prog->in[i].id = i; + prog->in[i].sn = info->in[i].sn; + prog->in[i].si = info->in[i].si; + prog->in[i].hw = n; + prog->in[i].mask = info->in[i].mask; + + prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32); + + for (c = 0; c < 4; ++c) + if (info->in[i].mask & (1 << c)) +n++; + + if (info->in[i].sn == TGSI_SEMANTIC_PRIMID) + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID; + } + prog->in_nr = info->numInputs; + + for (i = 0; i < info->numSysVals; ++i) { + switch (info->sv[i].sn) { + case TGSI_SEMANTIC_INSTANCEID: + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID; + continue; + case TGSI_SEMANTIC_VERTEXID: + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; + continue; + default: + break; + } + } + + /* +* Corner case: VP has no inputs, but we will still need to submit data to +* draw it. HW will shout at us and won't draw anything if we don't enable +* any input, so let's just pretend it's the first one. +*/ + if (prog->vp.attrs[0] == 0 && + prog->vp.attrs[1] == 0 && + prog->vp.attrs[2] == 0) + prog->vp.attrs[0] |= 0xf; + + n = 0; + for (i = 0; i < info->numOutputs; ++i) { + switch (info->out[i].sn) { + case TGSI_SEMANTIC_PSIZE: + prog->vp.psiz = i; + break; + case TGSI_SEMANTIC_CLIPDIST: + prog->vp.clpd[info->out[i].si] = n; + break; + case TGSI_SEMANTIC_EDGEFLAG: + prog->vp.edgeflag = i; + break; + case TGSI_SEMANTIC_BCOLOR: + prog->vp.bfc[info->out[i].si] = i; + break; + case TGSI_SEMANTIC_LAYER: + prog->gp.has_layer = true; + prog->gp.layerid = n; + break; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + prog->gp.has_viewport = true; + prog->gp.viewportid = n; + break; + default: + break; + } + prog->out[i].id = i; + prog->out[i].sn = info->out[i].sn; + prog->out[i].si = info->out[i].si; + prog->out[i].hw = n; + prog->out[i].mask = info->out[i].mask; + + for (c = 0; c < 4; ++c) +
[Mesa-dev] [PATCH v3 2/8] nv50/ir: add nv50_ir_prog_info_out
From: Karol Herbst Split out the output relevant fields from the nv50_ir_prog_info struct in order to have a cleaner separation between the input and output of the compilation. Signed-off-by: Karol Herbst --- .../drivers/nouveau/codegen/nv50_ir.cpp | 49 ++-- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 9 +- .../drivers/nouveau/codegen/nv50_ir_driver.h | 117 +--- .../nouveau/codegen/nv50_ir_from_common.cpp | 14 +- .../nouveau/codegen/nv50_ir_from_common.h | 3 +- .../nouveau/codegen/nv50_ir_from_nir.cpp | 204 +++--- .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 256 +- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 6 +- .../nouveau/codegen/nv50_ir_target.cpp| 2 +- .../drivers/nouveau/codegen/nv50_ir_target.h | 5 +- .../nouveau/codegen/nv50_ir_target_nv50.cpp | 17 +- .../nouveau/codegen/nv50_ir_target_nv50.h | 3 +- .../drivers/nouveau/nouveau_compiler.c| 9 +- .../drivers/nouveau/nv50/nv50_program.c | 61 +++-- .../drivers/nouveau/nvc0/nvc0_program.c | 87 +++--- 15 files changed, 448 insertions(+), 394 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index c65853578f6..c2c5956874a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1241,15 +1241,18 @@ void Program::releaseValue(Value *value) extern "C" { static void -nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) +nv50_ir_init_prog_info(struct nv50_ir_prog_info *info, + struct nv50_ir_prog_info_out *info_out) { + info_out->target = info->target; + info_out->type = info->type; if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) { - info->prop.tp.domain = PIPE_PRIM_MAX; - info->prop.tp.outputPrim = PIPE_PRIM_MAX; + info_out->prop.tp.domain = PIPE_PRIM_MAX; + info_out->prop.tp.outputPrim = PIPE_PRIM_MAX; } if (info->type == PIPE_SHADER_GEOMETRY) { - info->prop.gp.instanceCount = 1; - info->prop.gp.maxVertices = 1; + info_out->prop.gp.instanceCount = 1; + info_out->prop.gp.maxVertices = 1; } if (info->type == PIPE_SHADER_COMPUTE) { info->prop.cp.numThreads[0] = @@ -1257,23 +1260,26 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) info->prop.cp.numThreads[2] = 1; } info->io.pointSize = 0xff; - info->io.instanceId = 0xff; - info->io.vertexId = 0xff; - info->io.edgeFlagIn = 0xff; - info->io.edgeFlagOut = 0xff; - info->io.fragDepth = 0xff; - info->io.sampleMask = 0xff; + info_out->bin.smemSize = info->bin.smemSize; + info_out->io.genUserClip = info->io.genUserClip; + info_out->io.instanceId = 0xff; + info_out->io.vertexId = 0xff; + info_out->io.edgeFlagIn = 0xff; + info_out->io.edgeFlagOut = 0xff; + info_out->io.fragDepth = 0xff; + info_out->io.sampleMask = 0xff; info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff; } int -nv50_ir_generate_code(struct nv50_ir_prog_info *info) +nv50_ir_generate_code(struct nv50_ir_prog_info *info, + struct nv50_ir_prog_info_out *info_out) { int ret = 0; nv50_ir::Program::Type type; - nv50_ir_init_prog_info(info); + nv50_ir_init_prog_info(info, info_out); #define PROG_TYPE_CASE(a, b) \ case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break @@ -1301,15 +1307,16 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) return -1; } prog->driver = info; + prog->driver_out = info_out; prog->dbgFlags = info->dbgFlags; prog->optLevel = info->optLevel; switch (info->bin.sourceRep) { case PIPE_SHADER_IR_NIR: - ret = prog->makeFromNIR(info) ? 0 : -2; + ret = prog->makeFromNIR(info, info_out) ? 0 : -2; break; case PIPE_SHADER_IR_TGSI: - ret = prog->makeFromTGSI(info) ? 0 : -2; + ret = prog->makeFromTGSI(info, info_out) ? 0 : -2; break; default: ret = -1; @@ -1320,7 +1327,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) prog->print(); - targ->parseDriverInfo(info); + targ->parseDriverInfo(info, info_out); prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA); prog->convertToSSA(); @@ -1342,7 +1349,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) prog->optimizePostRA(info->optLevel); - if (!prog->emitBinary(info)) { + if (!prog->emitBinary(info_out)) { ret = -5; goto out; } @@ -1350,10 +1357,10 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) out: INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret); - info->bin.maxGPR = prog->maxGPR; - info->bin.code = prog->code; - info->bin.codeSize = prog->binSize; - info->bin.tlsSpace =
[Mesa-dev] [PATCH v3 5/8] nv50/ir: Add nv50_ir_prog_info serialize
Adds a function for serializing a nv50_ir_prog_info structure, which is needed for shader caching. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 4 ++ .../nouveau/codegen/nv50_ir_serialize.cpp | 42 +++ 2 files changed, 46 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 1bfaa8483ca..d33c6b6b83c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -278,6 +278,10 @@ namespace nv50_ir extern void nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *); +/* Serialize a nv50_ir_prog_info structure and save it into blob */ +extern bool +nv50_ir_prog_info_serialize(struct blob *, struct nv50_ir_prog_info *); + /* Serialize a nv50_ir_prog_info_out structure and save it into blob */ extern bool MUST_CHECK nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp index 52ceb66947f..e9d1d0b3215 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp @@ -16,6 +16,48 @@ enum FixupApplyFunc { FLIP_GM107 }; +extern bool +nv50_ir_prog_info_serialize(struct blob *blob, struct nv50_ir_prog_info *info) +{ + blob_write_uint16(blob, info->target); + blob_write_uint8(blob, info->type); + blob_write_uint8(blob, info->optLevel); + blob_write_uint8(blob, info->dbgFlags); + blob_write_uint8(blob, info->omitLineNum); + blob_write_uint32(blob, info->bin.smemSize); + blob_write_uint16(blob, info->bin.maxOutput); + blob_write_uint8(blob, info->bin.sourceRep); + + switch(info->bin.sourceRep) { + case PIPE_SHADER_IR_TGSI: { + struct tgsi_token *tokens = (struct tgsi_token *)info->bin.source; + unsigned int num_tokens = tgsi_num_tokens(tokens); + + blob_write_uint32(blob, num_tokens); + blob_write_bytes(blob, tokens, num_tokens * sizeof(struct tgsi_token)); + break; + } + case PIPE_SHADER_IR_NIR: { + struct nir_shader *nir = (struct nir_shader *)info->bin.source; + nir_serialize(blob, nir, false); + break; + } + default: + assert(!"unhandled info->bin.sourceRep switch case"); + return false; + } + + blob_write_uint16(blob, info->immd.bufSize); + blob_write_bytes(blob, info->immd.buf, info->immd.bufSize * sizeof(*info->immd.buf)); + blob_write_uint16(blob, info->immd.count); + blob_write_bytes(blob, info->immd.data, info->immd.count * sizeof(*info->immd.data)); + blob_write_bytes(blob, info->immd.type, info->immd.count * 16); // for each vec4 (128 bit) + blob_write_bytes(blob, >prop, sizeof(info->prop)); + blob_write_bytes(blob, >io, sizeof(info->io)); + + return true; +} + extern bool nv50_ir_prog_info_out_serialize(struct blob *blob, struct nv50_ir_prog_info_out *info_out) -- 2.24.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 6/8] nvc0: Add shader disk caching
Adds shader disk caching for nvc0 to reduce the need to every time compile shaders. Shaders are saved into disk_shader_cache from nvc0_screen structure. It serializes the input nv50_ir_prog_info to compute the hash key and also to do a byte compare between the original nv50_ir_prog_info and the one saved in the cache. If keys match and also the byte compare returns they are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the cache can be used instead of compiling input info. Seems to be significantly improving loading times, these are the results from running bunch of shaders: cache off real2m58.574s user21m34.018s sys 0m8.055s cache on, first run real3m32.617s user24m52.701s sys 0m20.400s cache on, second run real0m23.745s user2m43.566s sys 0m4.532s Signed-off-by: Mark Menzynski --- .../drivers/nouveau/nvc0/nvc0_context.h | 1 + .../drivers/nouveau/nvc0/nvc0_program.c | 51 +-- .../drivers/nouveau/nvc0/nvc0_shader_state.c | 3 +- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 2 + 4 files changed, 51 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 8a2a8f2797e..4b83d1afeb4 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -321,6 +321,7 @@ extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); /* nvc0_program.c */ bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset, +struct disk_cache *, struct pipe_debug_callback *); bool nvc0_program_upload(struct nvc0_context *, struct nvc0_program *); void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 7f32dc941d6..50430931194 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -24,6 +24,7 @@ #include "compiler/nir/nir.h" #include "tgsi/tgsi_ureg.h" +#include "util/blob.h" #include "nvc0/nvc0_context.h" @@ -568,11 +569,18 @@ nvc0_program_dump(struct nvc0_program *prog) bool nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, + struct disk_cache *disk_shader_cache, struct pipe_debug_callback *debug) { + struct blob blob; struct nv50_ir_prog_info *info; struct nv50_ir_prog_info_out info_out = {}; - int ret; + + + + int ret = 0; + cache_key key; + bool shader_found = false; info = CALLOC_STRUCT(nv50_ir_prog_info); if (!info) @@ -632,11 +640,44 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->bin.smemSize = prog->cp.smem_size; info->io.genUserClip = prog->vp.num_ucps; - ret = nv50_ir_generate_code(info, _out); - if (ret) { - NOUVEAU_ERR("shader translation failed: %i\n", ret); - goto out; + blob_init(); + + if (disk_shader_cache) { + void *cached_data = NULL; + size_t cached_size; + + nv50_ir_prog_info_serialize(, info); + disk_cache_compute_key(disk_shader_cache, blob.data, blob.size, key); + cached_data = disk_cache_get(disk_shader_cache, key, _size); + + if (cached_data && cached_size >= blob.size) { // blob.size is the size of serialized "info" + if (memcmp(cached_data, blob.data, blob.size) == 0) { +shader_found = true; +/* Blob contains only "info". In disk cache, "info_out" comes right after it */ +size_t offset = blob.size; +if (!nv50_ir_prog_info_out_deserialize(cached_data, cached_size, offset, _out)) { + NOUVEAU_ERR("shader deserialization failed:\n"); + goto out; +} + } + } + free(cached_data); + } + if (!shader_found) { + ret = nv50_ir_generate_code(info, _out); + if (ret) { + NOUVEAU_ERR("shader translation failed: %i\n", ret); + goto out; + } + if (disk_shader_cache) { + if (!nv50_ir_prog_info_out_serialize(, _out)) { +NOUVEAU_ERR("shader serialization failed:\n"); +goto out; + } + disk_cache_put(disk_shader_cache, key, blob.data, blob.size, NULL); + } } + blob_finish(); prog->code = info_out.bin.code; prog->code_size = info_out.bin.codeSize; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 774c5648113..4327a89454b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -54,7 +54,8 @@ nvc0_program_vali
[Mesa-dev] [PATCH v3 4/8] nv50/ir: Add prog_info_out print
Adds a function for printing nv50_ir_prog_info_out structure in JSON-like format, which could be used in debugging. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 3 + .../drivers/nouveau/codegen/nv50_ir_print.cpp | 154 ++ 2 files changed, 157 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 591aa8f57e8..1bfaa8483ca 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -275,6 +275,9 @@ namespace nv50_ir } #endif +extern void +nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *); + /* Serialize a nv50_ir_prog_info_out structure and save it into blob */ extern bool MUST_CHECK nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 5dcbf3c3e0c..2c13bef5e1a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -22,6 +22,7 @@ #include "codegen/nv50_ir.h" #include "codegen/nv50_ir_target.h" +#include "codegen/nv50_ir_driver.h" #include @@ -852,3 +853,156 @@ Function::printLiveIntervals() const } } // namespace nv50_ir + +extern void +nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *info_out) +{ + int i; + + INFO("{\n"); + INFO(" \"target\":\"%d\",\n", info_out->target); + INFO(" \"type\":\"%d\",\n", info_out->type); + + // Bin + INFO(" \"bin\":{\n"); + INFO(" \"maxGPR\":\"%d\",\n", info_out->bin.maxGPR); + INFO(" \"tlsSpace\":\"%d\",\n", info_out->bin.tlsSpace); + INFO(" \"smemSize\":\"%d\",\n", info_out->bin.smemSize); + INFO(" \"codeSize\":\"%d\",\n", info_out->bin.codeSize); + INFO(" \"instructions\":\"%d\",\n", info_out->bin.instructions); + + // RelocInfo + INFO(" \"RelocInfo\":"); + if (!info_out->bin.relocData) { + INFO("\"NULL\",\n"); + } else { + nv50_ir::RelocInfo *reloc = (nv50_ir::RelocInfo *)info_out->bin.relocData; + INFO("{\n"); + INFO(" \"codePos\":\"%d\",\n", reloc->codePos); + INFO(" \"libPos\":\"%d\",\n", reloc->libPos); + INFO(" \"dataPos\":\"%d\",\n", reloc->dataPos); + INFO(" \"count\":\"%d\",\n", reloc->count); + INFO(" \"RelocEntry\":[\n"); + for (unsigned int i = 0; i < reloc->count; i++) { + INFO(" {\"data\":\"%d\",\t\"mask\":\"%d\",\t\"offset\":\"%d\",\t\"bitPos\":\"%d\",\t\"type\":\"%d\"}", + reloc->entry[i].data, reloc->entry[i].mask, reloc->entry[i].offset, reloc->entry[i].bitPos, reloc->entry[i].type + ); + } + INFO("\n"); + INFO(" ]\n"); + INFO(" },\n"); + } + + // FixupInfo + INFO(" \"FixupInfo\":"); + if (!info_out->bin.fixupData) { + INFO("\"NULL\"\n"); + } else { + nv50_ir::FixupInfo *fixup = (nv50_ir::FixupInfo *)info_out->bin.fixupData; + INFO("{\n"); + INFO(" \"count\":\"%d\"\n", fixup->count); + INFO(" \"FixupEntry\":[\n"); + for (unsigned int i = 0; i < fixup->count; i++) { + INFO(" {\"apply\":\"%p\",\t\"ipa\":\"%d\",\t\"reg\":\"%d\",\t\"loc\":\"%d\"}\n", + fixup->entry[i].apply, fixup->entry[i].ipa, fixup->entry[i].reg, fixup->entry[i].loc); + } + INFO("\n"); + INFO(" ]\n"); + INFO(" }\n"); + + INFO(" },\n"); + } + + if (info_out->numSysVals) { + INFO(" \"sv\":[\n"); + for (i = 0; i < info_out->numSysVals; i++) { + if (&(info_out->sv[i])) { +INFO(" {\"id\":\"%d\", \"sn\":\"%d\", \"si\":\"%d\"}\n", + info_out->sv[i].id, info_out->sv[i].sn, inf
[Mesa-dev] [PATCH v3 1/8] nv50/ir: remove symbol table support for compute shaders
From: Karol Herbst The initial plan was to use this for OpenCL kernels, but back then the plan was to convert from LLVM to TGSI. As it turns out, we didn't went that way. Right now for OpenCL we don't reqiure supporting multiple entry points inside the same binary and if we want to support it later, we can add this back. Signed-off-by: Karol Herbst --- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 2 -- .../drivers/nouveau/codegen/nv50_ir_driver.h | 2 -- .../nouveau/codegen/nv50_ir_target.cpp| 23 --- .../drivers/nouveau/nv50/nv50_compute.c | 17 +- .../drivers/nouveau/nv50/nv50_program.c | 10 .../drivers/nouveau/nv50/nv50_program.h | 2 -- .../drivers/nouveau/nvc0/nvc0_compute.c | 2 +- .../drivers/nouveau/nvc0/nvc0_context.h | 2 -- .../drivers/nouveau/nvc0/nvc0_program.c | 21 - .../drivers/nouveau/nvc0/nvc0_program.h | 2 -- .../drivers/nouveau/nvc0/nve4_compute.c | 4 ++-- 11 files changed, 4 insertions(+), 83 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 296b79f5d49..d2200fc4ea9 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -1297,8 +1297,6 @@ public: const Target *getTarget() const { return target; } private: - void emitSymbolTable(struct nv50_ir_prog_info *); - Type progType; Target *target; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 322bdd02557..55cc4c609f0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -97,8 +97,6 @@ struct nv50_ir_prog_info const void *source; void *relocData; void *fixupData; - struct nv50_ir_prog_symbol *syms; - uint16_t numSyms; } bin; struct nv50_ir_varying sv[PIPE_MAX_SHADER_INPUTS]; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 5c6d0570ae2..0af79e9d50a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -343,27 +343,6 @@ CodeEmitter::prepareEmission(BasicBlock *bb) func->binSize += bb->binSize; } -void -Program::emitSymbolTable(struct nv50_ir_prog_info *info) -{ - unsigned int n = 0, nMax = allFuncs.getSize(); - - info->bin.syms = - (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms)); - - for (ArrayList::Iterator fi = allFuncs.iterator(); -!fi.end(); -fi.next(), ++n) { - Function *f = (Function *)fi.get(); - assert(n < nMax); - - info->bin.syms[n].label = f->getLabel(); - info->bin.syms[n].offset = f->binPos; - } - - info->bin.numSyms = n; -} - bool Program::emitBinary(struct nv50_ir_prog_info *info) { @@ -403,8 +382,6 @@ Program::emitBinary(struct nv50_ir_prog_info *info) info->bin.relocData = emit->getRelocInfo(); info->bin.fixupData = emit->getFixupInfo(); - emitSymbolTable(info); - // the nvc0 driver will print the binary iself together with the header if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0) emit->printBinary(); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c index d781f6fd7d4..1a78a371405 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c @@ -225,21 +225,6 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input) } } -static uint32_t -nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label) -{ - struct nv50_program *prog = nv50->compprog; - const struct nv50_ir_prog_symbol *syms = - (const struct nv50_ir_prog_symbol *)prog->cp.syms; - unsigned i; - - for (i = 0; i < prog->cp.num_syms; ++i) { - if (syms[i].label == label) - return prog->code_base + syms[i].offset; - } - return prog->code_base; /* no symbols or symbol not found */ -} - void nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { @@ -258,7 +243,7 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) nv50_compute_upload_input(nv50, info->input); BEGIN_NV04(push, NV50_CP(CP_START_ID), 1); - PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc)); + PUSH_DATA (push, cp->code_base); BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1); PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40)); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index c9d01e8cee7..58c0463f1a2 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++
[Mesa-dev] [RFC PATCH v2 6/6] nv50: Add shader disk caching
Adds shader disk caching for nv50 to reduce the need to every time compile shaders. Shaders are saved into disk_shader_cache from nv50_screen structure. It serializes the input nv50_ir_prog_info to compute the hash key and also to do a byte compare between the original nv50_ir_prog_info and the one saved in the cache. If keys match and also the byte compare returns they are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the cache can be used instead of compiling input info. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/nv50/nv50_program.c | 276 +++--- .../drivers/nouveau/nv50/nv50_program.h | 2 + .../drivers/nouveau/nv50/nv50_shader_state.c | 4 +- src/gallium/drivers/nouveau/nv50/nv50_state.c | 1 + 4 files changed, 47 insertions(+), 236 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index b5e36cf488d..156ac286a7f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -37,108 +37,6 @@ bitcount4(const uint32_t val) return cnt[val & 0xf]; } -static int -nv50_vertprog_assign_slots(struct nv50_ir_prog_info_out *info) -{ - struct nv50_program *prog = (struct nv50_program *)info->driverPriv; - unsigned i, n, c; - - n = 0; - for (i = 0; i < info->numInputs; ++i) { - prog->in[i].id = i; - prog->in[i].sn = info->in[i].sn; - prog->in[i].si = info->in[i].si; - prog->in[i].hw = n; - prog->in[i].mask = info->in[i].mask; - - prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32); - - for (c = 0; c < 4; ++c) - if (info->in[i].mask & (1 << c)) -info->in[i].slot[c] = n++; - - if (info->in[i].sn == TGSI_SEMANTIC_PRIMID) - prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID; - } - prog->in_nr = info->numInputs; - - for (i = 0; i < info->numSysVals; ++i) { - switch (info->sv[i].sn) { - case TGSI_SEMANTIC_INSTANCEID: - prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID; - continue; - case TGSI_SEMANTIC_VERTEXID: - prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; - prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; - continue; - default: - break; - } - } - - /* -* Corner case: VP has no inputs, but we will still need to submit data to -* draw it. HW will shout at us and won't draw anything if we don't enable -* any input, so let's just pretend it's the first one. -*/ - if (prog->vp.attrs[0] == 0 && - prog->vp.attrs[1] == 0 && - prog->vp.attrs[2] == 0) - prog->vp.attrs[0] |= 0xf; - - /* VertexID before InstanceID */ - if (info->io.vertexId < info->numSysVals) - info->sv[info->io.vertexId].slot[0] = n++; - if (info->io.instanceId < info->numSysVals) - info->sv[info->io.instanceId].slot[0] = n++; - - n = 0; - for (i = 0; i < info->numOutputs; ++i) { - switch (info->out[i].sn) { - case TGSI_SEMANTIC_PSIZE: - prog->vp.psiz = i; - break; - case TGSI_SEMANTIC_CLIPDIST: - prog->vp.clpd[info->out[i].si] = n; - break; - case TGSI_SEMANTIC_EDGEFLAG: - prog->vp.edgeflag = i; - break; - case TGSI_SEMANTIC_BCOLOR: - prog->vp.bfc[info->out[i].si] = i; - break; - case TGSI_SEMANTIC_LAYER: - prog->gp.has_layer = true; - prog->gp.layerid = n; - break; - case TGSI_SEMANTIC_VIEWPORT_INDEX: - prog->gp.has_viewport = true; - prog->gp.viewportid = n; - break; - default: - break; - } - prog->out[i].id = i; - prog->out[i].sn = info->out[i].sn; - prog->out[i].si = info->out[i].si; - prog->out[i].hw = n; - prog->out[i].mask = info->out[i].mask; - - for (c = 0; c < 4; ++c) - if (info->out[i].mask & (1 << c)) -info->out[i].slot[c] = n++; - } - prog->out_nr = info->numOutputs; - prog->max_out = n; - if (!prog->max_out) - prog->max_out = 1; - - if (prog->vp.psiz < info->numOutputs) - prog->vp.psiz = prog->out[prog->vp.psiz].hw; - - return 0; -} - static int nv50_vertprog_assign_slots_info(struct nv50_ir_prog_info_out *info) { @@ -263,115 +161,6 @@ nv50_vertprog_assign_slots_prog(struct nv50_ir_prog_info_out *info) return 0; } -static int -nv50_fragprog_assign_slots(struct nv50_ir_prog_info_out *info) -{ - struct nv50_program *prog = (struct nv50_program *)info->driverPriv; - unsigned i, n, m, c; - unsigned nvary; - unsi
[Mesa-dev] [RFC PATCH v2 1/6] nv50/ir: add nv50_ir_prog_info_out
From: Karol Herbst Split out the output relevant fields from the nv50_ir_prog_info struct in order to have a cleaner separation between the input and output of the compilation. Signed-off-by: Karol Herbst --- .../drivers/nouveau/codegen/nv50_ir.cpp | 49 ++-- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 9 +- .../drivers/nouveau/codegen/nv50_ir_driver.h | 117 +--- .../nouveau/codegen/nv50_ir_from_common.cpp | 14 +- .../nouveau/codegen/nv50_ir_from_common.h | 3 +- .../nouveau/codegen/nv50_ir_from_nir.cpp | 204 +++--- .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 256 +- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 6 +- .../nouveau/codegen/nv50_ir_target.cpp| 2 +- .../drivers/nouveau/codegen/nv50_ir_target.h | 5 +- .../nouveau/codegen/nv50_ir_target_nv50.cpp | 17 +- .../nouveau/codegen/nv50_ir_target_nv50.h | 3 +- .../drivers/nouveau/nouveau_compiler.c| 9 +- .../drivers/nouveau/nv50/nv50_program.c | 62 +++-- .../drivers/nouveau/nvc0/nvc0_program.c | 87 +++--- 15 files changed, 449 insertions(+), 394 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index c65853578f6..c2c5956874a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1241,15 +1241,18 @@ void Program::releaseValue(Value *value) extern "C" { static void -nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) +nv50_ir_init_prog_info(struct nv50_ir_prog_info *info, + struct nv50_ir_prog_info_out *info_out) { + info_out->target = info->target; + info_out->type = info->type; if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) { - info->prop.tp.domain = PIPE_PRIM_MAX; - info->prop.tp.outputPrim = PIPE_PRIM_MAX; + info_out->prop.tp.domain = PIPE_PRIM_MAX; + info_out->prop.tp.outputPrim = PIPE_PRIM_MAX; } if (info->type == PIPE_SHADER_GEOMETRY) { - info->prop.gp.instanceCount = 1; - info->prop.gp.maxVertices = 1; + info_out->prop.gp.instanceCount = 1; + info_out->prop.gp.maxVertices = 1; } if (info->type == PIPE_SHADER_COMPUTE) { info->prop.cp.numThreads[0] = @@ -1257,23 +1260,26 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) info->prop.cp.numThreads[2] = 1; } info->io.pointSize = 0xff; - info->io.instanceId = 0xff; - info->io.vertexId = 0xff; - info->io.edgeFlagIn = 0xff; - info->io.edgeFlagOut = 0xff; - info->io.fragDepth = 0xff; - info->io.sampleMask = 0xff; + info_out->bin.smemSize = info->bin.smemSize; + info_out->io.genUserClip = info->io.genUserClip; + info_out->io.instanceId = 0xff; + info_out->io.vertexId = 0xff; + info_out->io.edgeFlagIn = 0xff; + info_out->io.edgeFlagOut = 0xff; + info_out->io.fragDepth = 0xff; + info_out->io.sampleMask = 0xff; info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff; } int -nv50_ir_generate_code(struct nv50_ir_prog_info *info) +nv50_ir_generate_code(struct nv50_ir_prog_info *info, + struct nv50_ir_prog_info_out *info_out) { int ret = 0; nv50_ir::Program::Type type; - nv50_ir_init_prog_info(info); + nv50_ir_init_prog_info(info, info_out); #define PROG_TYPE_CASE(a, b) \ case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break @@ -1301,15 +1307,16 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) return -1; } prog->driver = info; + prog->driver_out = info_out; prog->dbgFlags = info->dbgFlags; prog->optLevel = info->optLevel; switch (info->bin.sourceRep) { case PIPE_SHADER_IR_NIR: - ret = prog->makeFromNIR(info) ? 0 : -2; + ret = prog->makeFromNIR(info, info_out) ? 0 : -2; break; case PIPE_SHADER_IR_TGSI: - ret = prog->makeFromTGSI(info) ? 0 : -2; + ret = prog->makeFromTGSI(info, info_out) ? 0 : -2; break; default: ret = -1; @@ -1320,7 +1327,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) prog->print(); - targ->parseDriverInfo(info); + targ->parseDriverInfo(info, info_out); prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA); prog->convertToSSA(); @@ -1342,7 +1349,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) prog->optimizePostRA(info->optLevel); - if (!prog->emitBinary(info)) { + if (!prog->emitBinary(info_out)) { ret = -5; goto out; } @@ -1350,10 +1357,10 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) out: INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret); - info->bin.maxGPR = prog->maxGPR; - info->bin.code = prog->code; - info->bin.codeSize = prog->binSize; - info->bin.tlsSpace =
[Mesa-dev] [RFC PATCH v2 4/6] nv50/ir: Add nv50_ir_prog_info serialize
Adds a function for serializing a nv50_ir_prog_info structure, which is needed for shader caching. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 4 + .../nouveau/codegen/nv50_ir_serialize.cpp | 81 +++ .../drivers/nouveau/nvc0/nvc0_context.h | 1 + .../drivers/nouveau/nvc0/nvc0_program.c | 43 -- .../drivers/nouveau/nvc0/nvc0_shader_state.c | 3 +- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 2 + 6 files changed, 128 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 10ae5cbe420..3728470ab45 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -278,6 +278,10 @@ namespace nv50_ir extern void nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *); +/* Serialize a nv50_ir_prog_info structure and save it into blob */ +extern bool +nv50_ir_prog_info_serialize(struct blob *, struct nv50_ir_prog_info *); + /* Serialize a nv50_ir_prog_info_out structure and save it into blob */ extern bool nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp index 5671483bd4e..b640cb67503 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp @@ -17,6 +17,87 @@ enum FixupApplyFunc { FLIP_GM107 = 7 }; +extern bool +nv50_ir_prog_info_serialize(struct blob *blob, struct nv50_ir_prog_info *info) +{ + blob_write_uint16(blob, info->target); + blob_write_uint8(blob, info->type); + blob_write_uint8(blob, info->optLevel); + blob_write_uint8(blob, info->dbgFlags); + blob_write_uint8(blob, info->omitLineNum); + blob_write_uint32(blob, info->bin.smemSize); + blob_write_uint16(blob, info->bin.maxOutput); + blob_write_uint8(blob, info->bin.sourceRep); + + switch(info->bin.sourceRep) { + case PIPE_SHADER_IR_TGSI: { + struct tgsi_token *tokens = (struct tgsi_token *)info->bin.source; + unsigned int num_tokens = tgsi_num_tokens(tokens); + + blob_write_uint32(blob, num_tokens); + blob_write_bytes(blob, tokens, num_tokens * sizeof(struct tgsi_token)); + break; + } + case PIPE_SHADER_IR_NIR: { + struct nir_shader *nir = (struct nir_shader *)info->bin.source; + nir_serialize(blob, nir, false); + break; + } + default: + assert(!"unhandled info->bin.sourceRep"); + return false; + } + + blob_write_uint16(blob, info->immd.bufSize); + blob_write_bytes(blob, info->immd.buf, info->immd.bufSize * sizeof(*info->immd.buf)); + blob_write_uint16(blob, info->immd.count); + blob_write_bytes(blob, info->immd.data, info->immd.count * sizeof(*info->immd.data)); + blob_write_bytes(blob, info->immd.type, info->immd.count * 16); // for each vec4 (128 bit) + + switch (info->type) { + case PIPE_SHADER_VERTEX: + blob_write_bytes(blob, info->prop.vp.inputMask, + 4 * sizeof(*info->prop.vp.inputMask)); /* array of size 4 */ + break; + case PIPE_SHADER_TESS_CTRL: + blob_write_uint32(blob, info->prop.cp.inputOffset); + blob_write_uint32(blob, info->prop.cp.sharedOffset); + blob_write_uint32(blob, info->prop.cp.gridInfoBase); + blob_write_bytes(blob, info->prop.cp.numThreads, + 3 * sizeof(*info->prop.cp.numThreads)); /* array of size 3 */ + case PIPE_SHADER_GEOMETRY: + blob_write_uint8(blob, info->prop.gp.inputPrim); + break; + case PIPE_SHADER_FRAGMENT: + blob_write_uint8(blob, info->prop.fp.persampleInvocation); + break; + default: + break; + } + + blob_write_uint8(blob, info->io.auxCBSlot); + blob_write_uint16(blob, info->io.ucpBase); + blob_write_uint16(blob, info->io.drawInfoBase); + blob_write_uint16(blob, info->io.alphaRefBase); + blob_write_uint8(blob, info->io.pointSize); + blob_write_uint8(blob, info->io.viewportId); + blob_write_bytes(blob, info->io.backFaceColor, 2 * sizeof(*info->io.backFaceColor)); + blob_write_uint8(blob, info->io.mul_zero_wins); + blob_write_uint8(blob, info->io.nv50styleSurfaces); + blob_write_uint16(blob, info->io.texBindBase); + blob_write_uint16(blob, info->io.fbtexBindBase); + blob_write_uint16(blob, info->io.suInfoBase); + blob_write_uint16(blob, info->io.bindlessBase); + blob_write_uint16(blob, info->io.bufInfoBase); + blob_write_uint16(blob, info->io.sampleInfoBase); + blob_write_uint8(blob, info->io.msInfoCBSlot); + blob_wr
[Mesa-dev] [RFC PATCH v2 2/6] nv50/ir: Add nv50_ir_prog_info_out serialize and deserialize
Adds functions for serializing and deserializing nv50_ir_prog_info_out structure, which are needed for shader caching. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 44 .../nouveau/codegen/nv50_ir_emit_gk110.cpp| 14 +- .../nouveau/codegen/nv50_ir_emit_gm107.cpp| 14 +- .../nouveau/codegen/nv50_ir_emit_nv50.cpp | 6 +- .../nouveau/codegen/nv50_ir_emit_nvc0.cpp | 14 +- .../nouveau/codegen/nv50_ir_serialize.cpp | 196 ++ src/gallium/drivers/nouveau/meson.build | 1 + 7 files changed, 265 insertions(+), 24 deletions(-) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index dab1ce030cb..eea32133ccf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -25,6 +25,7 @@ #include "pipe/p_shader_tokens.h" +#include "util/blob.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" @@ -242,6 +243,49 @@ nv50_ir_apply_fixups(void *fixupData, uint32_t *code, extern void nv50_ir_get_target_library(uint32_t chipset, const uint32_t **code, uint32_t *size); + +#ifdef __cplusplus +namespace nv50_ir +{ + class FixupEntry; + class FixupData; + + void + gk110_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + gm107_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + nv50_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, +const nv50_ir::FixupData& data); + void + nvc0_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, +const nv50_ir::FixupData& data); + void + gk110_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + gm107_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + nvc0_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + +} +#endif + +/* Serialize a nv50_ir_prog_info_out structure and save it into blob */ +extern bool +nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *); + +/* Deserialize from data and save into a nv50_ir_prog_info_out structure + * using a pointer. Size is a total size of the serialized data. + * Offset points to where info_out in data is located. */ +extern bool +nv50_ir_prog_info_out_deserialize(void *data, size_t size, size_t offset, + struct nv50_ir_prog_info_out *); + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 2118c3153f7..e651d7fdcb0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1209,8 +1209,8 @@ CodeEmitterGK110::emitSLCT(const CmpInstruction *i) } } -static void -selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) { int loc = entry->loc; if (data.force_persample_interp) @@ -1227,7 +1227,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i) code[1] |= 1 << 13; if (i->subOp == 1) { - addInterp(0, 0, selpFlip); + addInterp(0, 0, gk110_selpFlip); } } @@ -2042,8 +2042,8 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i) code[1] |= (i->ipa & 0xc) << (19 - 2); } -static void -interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +gk110_interpApply(const struct FixupEntry *entry, uint32_t *code, const FixupData& data) { int ipa = entry->ipa; int reg = entry->reg; @@ -2078,10 +2078,10 @@ CodeEmitterGK110::emitINTERP(const Instruction *i) if (i->op == OP_PINTERP) { srcId(i->src(1), 23); - addInterp(i->ipa, SDATA(i->src(1)).id, interpApply); + addInterp(i->ipa, SDATA(i->src(1)).id, gk110_interpApply); } else { code[0] |= 0xff << 23; - addInterp(i->ipa, 0xff, interpApply); + addInterp(i->ipa, 0xff, gk110_interpApply); } srcId(i->src(0).getIndirect(0), 10); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index e244bd0d610..4970f14cb33 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_e
[Mesa-dev] [RFC PATCH v2 3/6] nv50/ir: Add prog_info_out print
Adds a function for printing nv50_ir_prog_info_out structure in JSON-like format, which could be used in debugging. Signed-off-by: Mark Menzynski --- .../nouveau/codegen/.nv50_ir_from_nir.cpp.swp | Bin 0 -> 16384 bytes .../drivers/nouveau/codegen/nv50_ir_driver.h | 3 + .../drivers/nouveau/codegen/nv50_ir_print.cpp | 154 ++ 3 files changed, 157 insertions(+) create mode 100644 src/gallium/drivers/nouveau/codegen/.nv50_ir_from_nir.cpp.swp diff --git a/src/gallium/drivers/nouveau/codegen/.nv50_ir_from_nir.cpp.swp b/src/gallium/drivers/nouveau/codegen/.nv50_ir_from_nir.cpp.swp new file mode 100644 index ..c405065a5df6c33ee4f0439c30c474d446b87730 GIT binary patch literal 16384 zcmeHOYmgjO6>bBHf+(U`l!A4$5!hXaojg#f-DJt^>}FK^R)263; z=*Mn0F;N7|@&{n4WtEm?DVBv`4+)c+9$@lpCm@dx4q4T2IBQPl6;dwU+8O|n1y zW%X9R>`b3~(IOp`;=FU}piQPE0Tf@&~ns(*=bGKb~)t9u_-J)sTF87XZcp;I_ z_2c@M>2~}uni8LEziT?)CLK;B4D^Ft@{1BZwCE)UIo7+CqX>AFtb(*q#)B~L z`SBVzb`o9(Z9`*cGivFUa->aT)r~#63{-x-L75yS5P(uHwqTi|LL5lu@qTi?J&!*`A zQ1n32-+zJpknn$A(Qi}q(<%Dj6@5+7YZs38|C^$}OVPidqCcnT#})mWmyY&7t>{VJ zSw(kL|I1!B+W)Vq@i$WRzbN`URsUb4=+7v6qW`XoM)^OT;(uR?{%1u$r1+mo z(VtRuuIQzgkMcjI=*hf#Fh&1UivF5cjQ0PdqJK#7Ka!$9sp$Rreer1jKPY-4Upqzr zy`m@c=iwCncd7AbFB#?kTSdQB$#*hEe?rmM6@AAmNBgVksO?wuPp0VVarP^-KxTo= z0+|Ie3uG3^ERb0svp{Bn%mQb%fN5(QrE1@$L>TY?ll`JqnYTz_-s*}Ly zfxCgbfDTXvbRY*@16*S_cZVu;Bnw2@J--Tz{i320VZ$@a0u8B+yu-37XsUW zN0Dbf0z3?S4)`qa31A*50yh9}0k#1@grV*N3c!WH1;EdcN8bxLz}z#D-dBf0w^Fb6!3y2a0dPXl#e1~>)Zd=DTWeTV#{e%M)l zn!fKcC#>UEh8eJZtYx|(pP13mJa)Wj$}PZ64{K?F8J4#!1KWILdU|VFbddY zPPDxdO~b)QE5}w%H|A{e+8nXV=b0IXahKc7k2*Y9cS5e7_s4fIjAceVWKqYA@E>QU zZ3jFI9j^^BSt#Vpu{CbpFiIj;XO$Q?Muwyu2qqPUWKk#Z<90`kD5dV1f!XB|52lom zdP`7Se(c$7O=#9JL+1GrGnwVX5~de1bc=FBY+$>P1w4uakK2j5WVP6e9CTvie5yUg zB7S6oIUe%^n+L4r2jD0!varY(%wach^+TSBW0@g`wL>35I=B|Y);Y$xBZixpgUd6v z13OANL$GBbhj-AJwH%k*c;qQ*q7Lkee_dS4aW|L=A$s8G95qQm8ovoIG+m7Ev+%If zgPd|QpqmHXTYO!yqRBy~#_>N@OS9PfyJRmx958tR`OdkPJbRvOtI1$$5*rah$6exSL?is8SYpkqxXGi_`P!oVDm6yld{2_iLGM}*S`)= zj<9OdqomQMYLJUzg$( zO{>F~u-f`exEFU>+ciwf!p>vd5u2{la(ICD;BiHo9hSZ;TDGjWwQQ zL;B6KiW3M`m&~5nRL|Er*}_CmA#{0(cdM-y)^E5a4zvi*aON!2lRaUpk|+-dLFRcY z-7Spl>1k}kxR|$BHl3VJCg_aT!?ir@KhtC{;ejCb#+Chwa`N(Ed^zyj*kx^+%& zk;^5yz1KqnOvjclny4#%{fCAh%SqLXo33L`YsWOp#Tmo;Bd}>EX@f^azuU#$ryVEJ zTUg_uAp(if3w<41qHm zkEAMtzR)B;A`-PBkq1rE4a3!>m}dJ&%my-_LE>#`ABF4P)& zaiv_SvE`N8ausqHD)X#Tt>~478Z1y+DpeXd_S!osDSbecjE)y?(X}PSI=5wGH z%PaF_usN8jQf)w#CB0FahcZHP7`9fZG;|VUfi0D4#YKEgjFRY=xPa+(wp^>O>hs_* zR)CA@W3U{oO`cG^4iP6(^JcvQ5lZh^u903sOJayy&^WewxEd{DyclzV=wRJwK2E zrizj3ek)ox1BxGEX9}#F{u-Tmy^X+WcOnX89c)S_qFWIXVgYm=1m1`N6)Eh{aEJ!g zzF^}6xDmsII|Y(0#JWz{LEMLc(MHVZBTS8(@5Wg{Pbgq(`7YKP#q`)kZAS=-vjX~f zc3uRlLwd;=FBu^xCG?q)4*@rBe^VR{*=3=L%-yDHM zP96_oC(&9Yf9(oC0P=mvyh<1r>A_-eZph-s%@)L9w@qA9%hEz+)E`sSu z1k=Qfq>yndOcg4p2%T_p`4(HmqhL;P+%N5wRmtps~l2!N(?qXv+g9fgLsj-rQg~ zQ$29Ou@!JmV_6-Fl8~187-~U!#r|Z#f$(-#gCvVc!*tJ@((;oU z<^Q)Mw|x(CVaosatK9OZ$oGE^KPR{&2U@BbEX0$2v#44g*p{}}Kq;48qL zzzlE+a53;WY5|V|KLWlDd;~ZFYyd|9su#Qs7z3^bt^%G$P2dN>L%?T%yMZpy0ouSp zfNBU0U>~p-7zeHc#(<|#Bls%tW#A}q0HFH86@Ugj0i7QKz5r0Y;A6l?0kXw0Ap6NI zkXazJKxTo=0+|Ie3;eGwaGdJrgX~j}Qg|Ai}3w zCDLhC144^Ifu>It0*E>g#n3I}42k4Ys^J0=K7+BW<%*io42nS*=*%Bg zusNf`B#TM?F*Q}2nx5{Ohxr2jinOBF#?RK7O$w1U(tNmxqDEmTrU(bd9=XsSIC9j1 zcq;(UXb@Udr=;+zMkQX%Bt8%pUvuzhvE!}S6Pd?^Fg;lbZx1+XnnptDHxbsIAYmkD zYK%f1s$R(KPl?gJ-Hfr`%WZQnLH#Qh=yCl^``E4Q*H!;Xoc=rOl z8)5KQiZR~Jgq+;i=Smg`*E#Qz>QV$Ovjb+(G~1LLyYvEsl0QmyrSs{fjnEd1jO9YT zZmdeX5id2HWkm{wd8Ax$sbsua`+#?mgrgXQ+WVRrIP{tYFLj6zi>A zD-<+n{ literal 0 HcmV?d1 diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index eea32133ccf..10ae5cbe420 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -275,6 +275,9 @@ namespace nv50_ir } #endif +extern void +nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *); + /* Serialize a nv50_ir_prog_info_out structure and save it into blob */ extern bool nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 5dcbf3c3e0c..2c13bef5e1a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -22,6 +22,7 @@ #include "codegen/nv50_ir.h" #include "codegen/nv50_ir_target.h" +#include "codegen/nv50_ir_driver.h" #include @@ -852,3 +853,156 @@ Function::printLiveIntervals() const } } // namespace nv50_ir + +extern void +nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *info_out) +{ + int i; + + INFO("{\n"); + INFO(" \&q
[Mesa-dev] [RFC PATCH v2 5/6] nv50: Add separate functions for varying bits
This separation will be needed for shader disk caching. The reason for it is that when loading shaders from cache, data in info structure already gets loaded. That means varying bits for info is needed only when compiling shaders and not needed when loading from cache. Varying bits for prog are needed in both cases. Unfortunately, I don't know how most of the code works, I have separated this manually, only by looking at the original code. That means that this patch is experimental. Together with following commit it works (there seem to be no regressions at all in VK-GL-CTS [openglcts/data/mustpass/gl/khronos_mustpass/4.6.1.x/gl33-master.txt] and all benchmarks behaved normally). Unfortunately, I cannot test in Piglit because of technical problems, so there might be still some work needed. I am mainly asking to help with the function names, look for bugs and pointing out useless code. I will be glad for every review. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/nv50/nv50_program.c | 344 ++ 1 file changed, 344 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 924120eecdf..b5e36cf488d 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -139,6 +139,130 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info_out *info) return 0; } +static int +nv50_vertprog_assign_slots_info(struct nv50_ir_prog_info_out *info) +{ + unsigned i, n, c; + + n = 0; + for (i = 0; i < info->numInputs; ++i) { + for (c = 0; c < 4; ++c) + if (info->in[i].mask & (1 << c)) +info->in[i].slot[c] = n++; + } + + /* VertexID before InstanceID */ + if (info->io.vertexId < info->numSysVals) + info->sv[info->io.vertexId].slot[0] = n++; + if (info->io.instanceId < info->numSysVals) + info->sv[info->io.instanceId].slot[0] = n++; + + n = 0; + for (i = 0; i < info->numOutputs; ++i) { + for (c = 0; c < 4; ++c) + if (info->out[i].mask & (1 << c)) +info->out[i].slot[c] = n++; + } + + return 0; +} + +static int +nv50_vertprog_assign_slots_prog(struct nv50_ir_prog_info_out *info) +{ + struct nv50_program *prog = (struct nv50_program *)info->driverPriv; + unsigned i, n, c; + + n = 0; + for (i = 0; i < info->numInputs; ++i) { + prog->in[i].id = i; + prog->in[i].sn = info->in[i].sn; + prog->in[i].si = info->in[i].si; + prog->in[i].hw = n; + prog->in[i].mask = info->in[i].mask; + + prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32); + + for (c = 0; c < 4; ++c) + if (info->in[i].mask & (1 << c)) +n++; + + if (info->in[i].sn == TGSI_SEMANTIC_PRIMID) + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID; + } + prog->in_nr = info->numInputs; + + for (i = 0; i < info->numSysVals; ++i) { + switch (info->sv[i].sn) { + case TGSI_SEMANTIC_INSTANCEID: + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID; + continue; + case TGSI_SEMANTIC_VERTEXID: + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; + continue; + default: + break; + } + } + + /* +* Corner case: VP has no inputs, but we will still need to submit data to +* draw it. HW will shout at us and won't draw anything if we don't enable +* any input, so let's just pretend it's the first one. +*/ + if (prog->vp.attrs[0] == 0 && + prog->vp.attrs[1] == 0 && + prog->vp.attrs[2] == 0) + prog->vp.attrs[0] |= 0xf; + + n = 0; + for (i = 0; i < info->numOutputs; ++i) { + switch (info->out[i].sn) { + case TGSI_SEMANTIC_PSIZE: + prog->vp.psiz = i; + break; + case TGSI_SEMANTIC_CLIPDIST: + prog->vp.clpd[info->out[i].si] = n; + break; + case TGSI_SEMANTIC_EDGEFLAG: + prog->vp.edgeflag = i; + break; + case TGSI_SEMANTIC_BCOLOR: + prog->vp.bfc[info->out[i].si] = i; + break; + case TGSI_SEMANTIC_LAYER: + prog->gp.has_layer = true; + prog->gp.layerid = n; + break; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + prog->gp.has_viewport = true; + prog->gp.viewportid = n; + break; + default: + break; + } + prog->out[i].id = i; + prog->out[i].sn = info->out[i].sn; + prog->out[i].si = info->out[i].si; + prog->out[i].hw = n; + prog->out[i].mask = info->out[i].mask; + + for (c = 0; c < 4; ++c) +
[Mesa-dev] [RFC PATCH 2/2] nv50: Add shader disk caching
Adds shader disk caching for nv50 to reduce the need to every time compile shaders. Shaders are saved into disk_shader_cache from nv50_screen structure. It can be disabled with MESA_GLSL_CACHE_DISABLE=1. It serializes the input nv50_ir_prog_info to compute the hash key and also to do a byte compare between the original nv50_ir_prog_info and the one saved in the cache. If keys match and also the byte compare returns they are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the cache can be used instead of compiling input info. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/nv50/nv50_program.c | 55 --- .../drivers/nouveau/nv50/nv50_program.h | 2 + .../drivers/nouveau/nv50/nv50_shader_state.c | 4 +- src/gallium/drivers/nouveau/nv50/nv50_state.c | 1 + 4 files changed, 52 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index bf63b20f613..0b85267f36f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -667,10 +667,21 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info_out *info, bool nv50_program_translate(struct nv50_program *prog, uint16_t chipset, + struct disk_cache *disk_shader_cache, struct pipe_debug_callback *debug) { + struct blob blob; struct nv50_ir_prog_info *info; - int i, ret; + struct nv50_ir_prog_info_out info_out = {}; + + void *cached_data = NULL; + size_t cached_size; + bool shader_found = false; + + int i; + int ret = 0; + cache_key key; + const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80; info = CALLOC_STRUCT(nv50_ir_prog_info); @@ -704,7 +715,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, info->io.msInfoCBSlot = 15; info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET; - info->assignSlots = nv50_program_assign_varying_slots; + info->assignSlots = nv50_program_assign_varying_slots_info; prog->vp.bfc[0] = 0xff; prog->vp.bfc[1] = 0xff; @@ -726,16 +737,42 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, info->optLevel = 3; #endif - struct nv50_ir_prog_info_out info_out = {}; /* these fields might be overwritten by the compiler */ - info_out.bin.smemSize = prog->cp.smem_size; - info_out.io.genUserClip = prog->vp.clpd_nr; + info->bin.smemSize = prog->cp.smem_size; + info->io.genUserClip = prog->vp.clpd_nr; + + blob_init(); + + if (disk_shader_cache) { + nv50_ir_prog_info_serialize(, info); + disk_cache_compute_key(disk_shader_cache, blob.data, blob.size, key); + cached_data = disk_cache_get(disk_shader_cache, key, _size); + + if (cached_data && cached_size >= blob.size) { // blob.size is the size of serialized "info" + if (memcmp(cached_data, blob.data, blob.size) == 0) { +shader_found = true; +/* Blob contains only "info". In disk cache, "info_out" comes right after it */ +size_t offset = blob.size; +nv50_ir_prog_info_out_deserialize(cached_data, cached_size, offset, _out); + } + } + free(cached_data); + } info_out.driverPriv = prog; - ret = nv50_ir_generate_code(info, _out); - if (ret) { - NOUVEAU_ERR("shader translation failed: %i\n", ret); - goto out; + + if (!shader_found) { + ret = nv50_ir_generate_code(info, _out); + if (ret) { + NOUVEAU_ERR("shader translation failed: %i\n", ret); + goto out; + } + if (disk_shader_cache) { + nv50_ir_prog_info_out_serialize(, _out); + disk_cache_put(disk_shader_cache, key, blob.data, blob.size, NULL); + } } + blob_finish(); + nv50_program_assign_varying_slots_prog(_out); prog->code = info_out.bin.code; prog->code_size = info_out.bin.codeSize; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h index 1a89e0d5067..528e1d01fa1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -116,7 +116,9 @@ struct nv50_program { struct nv50_stream_output_state *so; }; +struct disk_cache; bool nv50_program_translate(struct nv50_program *, uint16_t chipset, +struct disk_cache *, struct pipe_debug_callback *); bool nv50_program_upload_code(struct nv50_context *, struct nv50_program *); void nv50_program_destroy(struct nv50_context *, struct nv50_program *); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index 2cbbdc0cc35..65891108464 100644 --- a/src/gallium/drivers
[Mesa-dev] [RFC PATCH 1/2] nv50: Add separate functions for varying bits
This separation will be needed for shader disk caching. The reason for it is that when loading shaders from cache, data in info structure already gets loaded. That means varying bits for info is needed only when compiling shaders and not needed when loading from cache. Varying bits for prog are needed in both cases. Unfortunately, I don't know how most of the code works, I have separated this manually, only by looking at the original code. That means that this patch is experimental. Together with following commit it works (there seem to be no regressions at all in VK-GL-CTS [openglcts/data/mustpass/gl/khronos_mustpass/4.6.1.x/gl33-master.txt] and all benchmarks behaved normally). Unfortunately, I cannot test in Piglit because of technical problems, so there might be still some work needed. I am mainly asking to help with the function names, look for bugs and pointing out useless code. I will be glad for every review. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/nv50/nv50_program.c | 344 ++ 1 file changed, 344 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index a3f3054cbaa..bf63b20f613 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -139,6 +139,130 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info_out *info) return 0; } +static int +nv50_vertprog_assign_slots_info(struct nv50_ir_prog_info_out *info) +{ + unsigned i, n, c; + + n = 0; + for (i = 0; i < info->numInputs; ++i) { + for (c = 0; c < 4; ++c) + if (info->in[i].mask & (1 << c)) +info->in[i].slot[c] = n++; + } + + /* VertexID before InstanceID */ + if (info->io.vertexId < info->numSysVals) + info->sv[info->io.vertexId].slot[0] = n++; + if (info->io.instanceId < info->numSysVals) + info->sv[info->io.instanceId].slot[0] = n++; + + n = 0; + for (i = 0; i < info->numOutputs; ++i) { + for (c = 0; c < 4; ++c) + if (info->out[i].mask & (1 << c)) +info->out[i].slot[c] = n++; + } + + return 0; +} + +static int +nv50_vertprog_assign_slots_prog(struct nv50_ir_prog_info_out *info) +{ + struct nv50_program *prog = (struct nv50_program *)info->driverPriv; + unsigned i, n, c; + + n = 0; + for (i = 0; i < info->numInputs; ++i) { + prog->in[i].id = i; + prog->in[i].sn = info->in[i].sn; + prog->in[i].si = info->in[i].si; + prog->in[i].hw = n; + prog->in[i].mask = info->in[i].mask; + + prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32); + + for (c = 0; c < 4; ++c) + if (info->in[i].mask & (1 << c)) +n++; + + if (info->in[i].sn == TGSI_SEMANTIC_PRIMID) + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID; + } + prog->in_nr = info->numInputs; + + for (i = 0; i < info->numSysVals; ++i) { + switch (info->sv[i].sn) { + case TGSI_SEMANTIC_INSTANCEID: + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID; + continue; + case TGSI_SEMANTIC_VERTEXID: + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; + continue; + default: + break; + } + } + + /* +* Corner case: VP has no inputs, but we will still need to submit data to +* draw it. HW will shout at us and won't draw anything if we don't enable +* any input, so let's just pretend it's the first one. +*/ + if (prog->vp.attrs[0] == 0 && + prog->vp.attrs[1] == 0 && + prog->vp.attrs[2] == 0) + prog->vp.attrs[0] |= 0xf; + + n = 0; + for (i = 0; i < info->numOutputs; ++i) { + switch (info->out[i].sn) { + case TGSI_SEMANTIC_PSIZE: + prog->vp.psiz = i; + break; + case TGSI_SEMANTIC_CLIPDIST: + prog->vp.clpd[info->out[i].si] = n; + break; + case TGSI_SEMANTIC_EDGEFLAG: + prog->vp.edgeflag = i; + break; + case TGSI_SEMANTIC_BCOLOR: + prog->vp.bfc[info->out[i].si] = i; + break; + case TGSI_SEMANTIC_LAYER: + prog->gp.has_layer = true; + prog->gp.layerid = n; + break; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + prog->gp.has_viewport = true; + prog->gp.viewportid = n; + break; + default: + break; + } + prog->out[i].id = i; + prog->out[i].sn = info->out[i].sn; + prog->out[i].si = info->out[i].si; + prog->out[i].hw = n; + prog->out[i].mask = info->out[i].mask; + + for (c = 0; c < 4;
[Mesa-dev] [PATCH v2 6/7] tgsi/util: Change boolean for bool
I was getting errors with "boolean" when compiling. This patch changes boolean to bool from . Signed-off-by: Mark Menzynski --- src/gallium/auxiliary/tgsi/tgsi_util.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_util.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 1e5582ba273..e1b604cff0e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -537,7 +537,7 @@ tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type tgsi_tex) } -boolean +bool tgsi_is_shadow_target(enum tgsi_texture_type target) { switch (target) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h index 686b90f467e..6dc576b1a00 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -28,6 +28,7 @@ #ifndef TGSI_UTIL_H #define TGSI_UTIL_H +#include #include "pipe/p_shader_tokens.h" #if defined __cplusplus @@ -84,11 +85,11 @@ tgsi_util_get_texture_coord_dim(enum tgsi_texture_type tgsi_tex); int tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type tgsi_tex); -boolean +bool tgsi_is_shadow_target(enum tgsi_texture_type target); -static inline boolean +static inline bool tgsi_is_msaa_target(enum tgsi_texture_type target) { return (target == TGSI_TEXTURE_2D_MSAA || -- 2.21.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 2/7] util/blob: Add overwrite function for uint8
Overwrite function for this type was missing and I needed it for my project. Signed-off-by: Mark Menzynski --- src/util/blob.c | 9 + src/util/blob.h | 15 +++ 2 files changed, 24 insertions(+) diff --git a/src/util/blob.c b/src/util/blob.c index 94d5a9dea74..5bf4b924c91 100644 --- a/src/util/blob.c +++ b/src/util/blob.c @@ -214,6 +214,15 @@ BLOB_WRITE_TYPE(blob_write_intptr, intptr_t) #define ASSERT_ALIGNED(_offset, _align) \ assert(ALIGN((_offset), (_align)) == (_offset)) +bool +blob_overwrite_uint8 (struct blob *blob, + size_t offset, + uint8_t value) +{ + ASSERT_ALIGNED(offset, sizeof(value)); + return blob_overwrite_bytes(blob, offset, , sizeof(value)); +} + bool blob_overwrite_uint32 (struct blob *blob, size_t offset, diff --git a/src/util/blob.h b/src/util/blob.h index 9113331254a..e1e156eb43f 100644 --- a/src/util/blob.h +++ b/src/util/blob.h @@ -183,6 +183,21 @@ blob_overwrite_bytes(struct blob *blob, bool blob_write_uint8(struct blob *blob, uint8_t value); +/** + * Overwrite a uint8_t previously written to the blob. + * + * Writes a uint8_t value to an existing portion of the blob at an offset of + * \offset. This data range must have previously been written to the blob by + * one of the blob_write_* calls. + * + * \return True unless the requested position or position+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_uint8(struct blob *blob, + size_t offset, + uint8_t value); + /** * Add a uint16_t to a blob. * -- 2.21.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 4/7] nv50/ir: Add prog_info_out print
Adds a function for printing nv50_ir_prog_info_out structure in JSON-like format, which could be used in debugging. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 3 + .../drivers/nouveau/codegen/nv50_ir_print.cpp | 153 ++ 2 files changed, 156 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index eea32133ccf..10ae5cbe420 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -275,6 +275,9 @@ namespace nv50_ir } #endif +extern void +nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *); + /* Serialize a nv50_ir_prog_info_out structure and save it into blob */ extern bool nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 5dcbf3c3e0c..4877047c0ec 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -22,6 +22,7 @@ #include "codegen/nv50_ir.h" #include "codegen/nv50_ir_target.h" +#include "codegen/nv50_ir_driver.h" #include @@ -852,3 +853,155 @@ Function::printLiveIntervals() const } } // namespace nv50_ir + +extern void +nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *info_out) +{ + int i; + + INFO("{\n"); + INFO(" \"target\":\"%d\",\n", info_out->target); + INFO(" \"type\":\"%d\",\n", info_out->type); + + // Bin + INFO(" \"bin\":{\n"); + INFO(" \"maxGPR\":\"%d\",\n", info_out->bin.maxGPR); + INFO(" \"tlsSpace\":\"%d\",\n", info_out->bin.tlsSpace); + INFO(" \"smemSize\":\"%d\",\n", info_out->bin.smemSize); + INFO(" \"codeSize\":\"%d\",\n", info_out->bin.codeSize); + INFO(" \"instructions\":\"%d\",\n", info_out->bin.instructions); + + // RelocInfo + INFO(" \"RelocInfo\":"); + if (!info_out->bin.relocData) { + INFO("\"NULL\",\n"); + } else { + nv50_ir::RelocInfo *reloc = (nv50_ir::RelocInfo *)info_out->bin.relocData; + INFO("{\n"); + INFO(" \"codePos\":\"%d\",\n", reloc->codePos); + INFO(" \"libPos\":\"%d\",\n", reloc->libPos); + INFO(" \"dataPos\":\"%d\",\n", reloc->dataPos); + INFO(" \"count\":\"%d\",\n", reloc->count); + INFO(" \"RelocEntry\":[\n"); + for (unsigned int i = 0; i < reloc->count; i++) { + INFO(" {\"data\":\"%d\",\t\"mask\":\"%d\",\t\"offset\":\"%d\",\t\"bitPos\":\"%d\",\t\"type\":\"%d\"}", + reloc->entry[i].data, reloc->entry[i].mask, reloc->entry[i].offset, reloc->entry[i].bitPos, reloc->entry[i].type + ); + } + INFO("\n"); + INFO(" ]\n"); + INFO(" },\n"); + } + + // FixupInfo + INFO(" \"FixupInfo\":"); + if (!info_out->bin.fixupData) { + INFO("\"NULL\"\n"); + } else { + nv50_ir::FixupInfo *fixup = (nv50_ir::FixupInfo *)info_out->bin.fixupData; + INFO("{\n"); + INFO(" \"count\":\"%d\"\n", fixup->count); + INFO(" \"FixupEntry\":[\n"); + for (unsigned int i = 0; i < fixup->count; i++) { + INFO(" {\"apply\":\"%p\",\t\"ipa\":\"%d\",\t\"reg\":\"%d\",\t\"loc\":\"%d\"}", + fixup->entry[i].apply, fixup->entry[i].ipa, fixup->entry[i].reg, fixup->entry[i].loc); + } + INFO("\n"); + INFO(" ]\n"); + INFO(" }\n"); + + INFO(" },\n"); + } + + if (info_out->numSysVals) { + INFO(" \"sv\":[\n"); + for (i = 0; i < info_out->numSysVals; i++) { + if (&(info_out->sv[i])) { +INFO(" {\"id\":\"%d\", \"sn\":\"%d\", \"si\":\"%d\"}", + info_out->sv[i].id, info_out->sv[i].sn, info_out->sv
[Mesa-dev] [PATCH v2 7/7] nvc0: Add shader disk caching
Adds shader disk caching for nvc0 to reduce the need to every time compile shaders. Shaders are saved into disk_shader_cache from nvc0_screen structure. It serializes the input nv50_ir_prog_info to compute the hash key and also to do a byte compare between the original nv50_ir_prog_info and the one saved in the cache. If keys match and also the byte compare returns they are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the cache can be used instead of compiling input info. Seems to be significantly improving loading times, these are the results from running bunch of shaders: cache off real2m58.574s user21m34.018s sys 0m8.055s cache on, first run real3m32.617s user24m52.701s sys 0m20.400s cache on, second run real0m23.745s user2m43.566s sys 0m4.532s Signed-off-by: Mark Menzynski --- .../drivers/nouveau/nvc0/nvc0_context.h | 1 + .../drivers/nouveau/nvc0/nvc0_program.c | 48 --- .../drivers/nouveau/nvc0/nvc0_shader_state.c | 3 +- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 2 + 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 8a2a8f2797e..4b83d1afeb4 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -321,6 +321,7 @@ extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); /* nvc0_program.c */ bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset, +struct disk_cache *, struct pipe_debug_callback *); bool nvc0_program_upload(struct nvc0_context *, struct nvc0_program *); void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 1a5073292e8..912d2dece4f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -24,6 +24,7 @@ #include "compiler/nir/nir.h" #include "tgsi/tgsi_ureg.h" +#include "util/blob.h" #include "nvc0/nvc0_context.h" @@ -568,11 +569,19 @@ nvc0_program_dump(struct nvc0_program *prog) bool nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, + struct disk_cache *disk_shader_cache, struct pipe_debug_callback *debug) { + struct blob blob; struct nv50_ir_prog_info *info; struct nv50_ir_prog_info_out info_out = {}; - int ret; + + void *cached_data = NULL; + size_t cached_size; + bool shader_found = false; + + int ret = 0; + cache_key key; info = CALLOC_STRUCT(nv50_ir_prog_info); if (!info) @@ -631,14 +640,39 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->assignSlots = nvc0_program_assign_varying_slots; /* these fields might be overwritten by the compiler */ - info_out.bin.smemSize = prog->cp.smem_size; - info_out.io.genUserClip = prog->vp.num_ucps; + info->bin.smemSize = prog->cp.smem_size; + info->io.genUserClip = prog->vp.num_ucps; + + blob_init(); + + if (disk_shader_cache) { + nv50_ir_prog_info_serialize(, info); + disk_cache_compute_key(disk_shader_cache, blob.data, blob.size, key); + cached_data = disk_cache_get(disk_shader_cache, key, _size); + + if (cached_data && cached_size >= blob.size) { // blob.size is the size of serialized "info" + if (memcmp(cached_data, blob.data, blob.size) == 0) { +shader_found = true; +/* Blob contains only "info". In disk cache, "info_out" comes right after it */ +size_t offset = blob.size; +nv50_ir_prog_info_out_deserialize(cached_data, cached_size, offset, _out); + } + } + free(cached_data); + } + if (!shader_found) { - ret = nv50_ir_generate_code(info, _out); - if (ret) { - NOUVEAU_ERR("shader translation failed: %i\n", ret); - goto out; + ret = nv50_ir_generate_code(info, _out); + if (ret) { + NOUVEAU_ERR("shader translation failed: %i\n", ret); + goto out; + } + if (disk_shader_cache) { + nv50_ir_prog_info_out_serialize(, _out); + disk_cache_put(disk_shader_cache, key, blob.data, blob.size, NULL); + } } + blob_finish(); prog->code = info_out.bin.code; prog->code_size = info_out.bin.codeSize; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 774c5648113..4327a89454b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -54,7 +54,8 @@ nvc0_program_validate(struct
[Mesa-dev] [PATCH v2 5/7] nv50/ir: Add nv50_ir_prog_info serialize
Adds a function for serializing a nv50_ir_prog_info structure, which is needed for shader caching. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 4 + .../nouveau/codegen/nv50_ir_serialize.cpp | 81 +++ 2 files changed, 85 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 10ae5cbe420..3728470ab45 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -278,6 +278,10 @@ namespace nv50_ir extern void nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *); +/* Serialize a nv50_ir_prog_info structure and save it into blob */ +extern bool +nv50_ir_prog_info_serialize(struct blob *, struct nv50_ir_prog_info *); + /* Serialize a nv50_ir_prog_info_out structure and save it into blob */ extern bool nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp index 5671483bd4e..b640cb67503 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp @@ -17,6 +17,87 @@ enum FixupApplyFunc { FLIP_GM107 = 7 }; +extern bool +nv50_ir_prog_info_serialize(struct blob *blob, struct nv50_ir_prog_info *info) +{ + blob_write_uint16(blob, info->target); + blob_write_uint8(blob, info->type); + blob_write_uint8(blob, info->optLevel); + blob_write_uint8(blob, info->dbgFlags); + blob_write_uint8(blob, info->omitLineNum); + blob_write_uint32(blob, info->bin.smemSize); + blob_write_uint16(blob, info->bin.maxOutput); + blob_write_uint8(blob, info->bin.sourceRep); + + switch(info->bin.sourceRep) { + case PIPE_SHADER_IR_TGSI: { + struct tgsi_token *tokens = (struct tgsi_token *)info->bin.source; + unsigned int num_tokens = tgsi_num_tokens(tokens); + + blob_write_uint32(blob, num_tokens); + blob_write_bytes(blob, tokens, num_tokens * sizeof(struct tgsi_token)); + break; + } + case PIPE_SHADER_IR_NIR: { + struct nir_shader *nir = (struct nir_shader *)info->bin.source; + nir_serialize(blob, nir, false); + break; + } + default: + assert(!"unhandled info->bin.sourceRep"); + return false; + } + + blob_write_uint16(blob, info->immd.bufSize); + blob_write_bytes(blob, info->immd.buf, info->immd.bufSize * sizeof(*info->immd.buf)); + blob_write_uint16(blob, info->immd.count); + blob_write_bytes(blob, info->immd.data, info->immd.count * sizeof(*info->immd.data)); + blob_write_bytes(blob, info->immd.type, info->immd.count * 16); // for each vec4 (128 bit) + + switch (info->type) { + case PIPE_SHADER_VERTEX: + blob_write_bytes(blob, info->prop.vp.inputMask, + 4 * sizeof(*info->prop.vp.inputMask)); /* array of size 4 */ + break; + case PIPE_SHADER_TESS_CTRL: + blob_write_uint32(blob, info->prop.cp.inputOffset); + blob_write_uint32(blob, info->prop.cp.sharedOffset); + blob_write_uint32(blob, info->prop.cp.gridInfoBase); + blob_write_bytes(blob, info->prop.cp.numThreads, + 3 * sizeof(*info->prop.cp.numThreads)); /* array of size 3 */ + case PIPE_SHADER_GEOMETRY: + blob_write_uint8(blob, info->prop.gp.inputPrim); + break; + case PIPE_SHADER_FRAGMENT: + blob_write_uint8(blob, info->prop.fp.persampleInvocation); + break; + default: + break; + } + + blob_write_uint8(blob, info->io.auxCBSlot); + blob_write_uint16(blob, info->io.ucpBase); + blob_write_uint16(blob, info->io.drawInfoBase); + blob_write_uint16(blob, info->io.alphaRefBase); + blob_write_uint8(blob, info->io.pointSize); + blob_write_uint8(blob, info->io.viewportId); + blob_write_bytes(blob, info->io.backFaceColor, 2 * sizeof(*info->io.backFaceColor)); + blob_write_uint8(blob, info->io.mul_zero_wins); + blob_write_uint8(blob, info->io.nv50styleSurfaces); + blob_write_uint16(blob, info->io.texBindBase); + blob_write_uint16(blob, info->io.fbtexBindBase); + blob_write_uint16(blob, info->io.suInfoBase); + blob_write_uint16(blob, info->io.bindlessBase); + blob_write_uint16(blob, info->io.bufInfoBase); + blob_write_uint16(blob, info->io.sampleInfoBase); + blob_write_uint8(blob, info->io.msInfoCBSlot); + blob_write_uint16(blob, info->io.msInfoBase); + blob_write_uint16(blob, info->io.uboInfoBase); + blob_write_uint8(blob, info->io.genUserClip); + + return true; +} + extern bool nv50_ir_prog_info_out_serialize(struct blob *blo
[Mesa-dev] [PATCH v2 1/7] nv50/ir: add nv50_ir_prog_info_out
From: Karol Herbst Split out the output relevant fields from the nv50_ir_prog_info struct in order to have a cleaner separation between the input and output of the compilation. Signed-off-by: Karol Herbst --- .../drivers/nouveau/codegen/nv50_ir.cpp | 49 ++-- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 9 +- .../drivers/nouveau/codegen/nv50_ir_driver.h | 117 +--- .../nouveau/codegen/nv50_ir_from_common.cpp | 14 +- .../nouveau/codegen/nv50_ir_from_common.h | 3 +- .../nouveau/codegen/nv50_ir_from_nir.cpp | 204 +++--- .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 256 +- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 6 +- .../nouveau/codegen/nv50_ir_target.cpp| 2 +- .../drivers/nouveau/codegen/nv50_ir_target.h | 5 +- .../nouveau/codegen/nv50_ir_target_nv50.cpp | 17 +- .../nouveau/codegen/nv50_ir_target_nv50.h | 3 +- .../drivers/nouveau/nouveau_compiler.c| 9 +- .../drivers/nouveau/nv50/nv50_program.c | 61 +++-- .../drivers/nouveau/nvc0/nvc0_program.c | 89 +++--- 15 files changed, 450 insertions(+), 394 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index c65853578f6..c2c5956874a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1241,15 +1241,18 @@ void Program::releaseValue(Value *value) extern "C" { static void -nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) +nv50_ir_init_prog_info(struct nv50_ir_prog_info *info, + struct nv50_ir_prog_info_out *info_out) { + info_out->target = info->target; + info_out->type = info->type; if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) { - info->prop.tp.domain = PIPE_PRIM_MAX; - info->prop.tp.outputPrim = PIPE_PRIM_MAX; + info_out->prop.tp.domain = PIPE_PRIM_MAX; + info_out->prop.tp.outputPrim = PIPE_PRIM_MAX; } if (info->type == PIPE_SHADER_GEOMETRY) { - info->prop.gp.instanceCount = 1; - info->prop.gp.maxVertices = 1; + info_out->prop.gp.instanceCount = 1; + info_out->prop.gp.maxVertices = 1; } if (info->type == PIPE_SHADER_COMPUTE) { info->prop.cp.numThreads[0] = @@ -1257,23 +1260,26 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) info->prop.cp.numThreads[2] = 1; } info->io.pointSize = 0xff; - info->io.instanceId = 0xff; - info->io.vertexId = 0xff; - info->io.edgeFlagIn = 0xff; - info->io.edgeFlagOut = 0xff; - info->io.fragDepth = 0xff; - info->io.sampleMask = 0xff; + info_out->bin.smemSize = info->bin.smemSize; + info_out->io.genUserClip = info->io.genUserClip; + info_out->io.instanceId = 0xff; + info_out->io.vertexId = 0xff; + info_out->io.edgeFlagIn = 0xff; + info_out->io.edgeFlagOut = 0xff; + info_out->io.fragDepth = 0xff; + info_out->io.sampleMask = 0xff; info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff; } int -nv50_ir_generate_code(struct nv50_ir_prog_info *info) +nv50_ir_generate_code(struct nv50_ir_prog_info *info, + struct nv50_ir_prog_info_out *info_out) { int ret = 0; nv50_ir::Program::Type type; - nv50_ir_init_prog_info(info); + nv50_ir_init_prog_info(info, info_out); #define PROG_TYPE_CASE(a, b) \ case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break @@ -1301,15 +1307,16 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) return -1; } prog->driver = info; + prog->driver_out = info_out; prog->dbgFlags = info->dbgFlags; prog->optLevel = info->optLevel; switch (info->bin.sourceRep) { case PIPE_SHADER_IR_NIR: - ret = prog->makeFromNIR(info) ? 0 : -2; + ret = prog->makeFromNIR(info, info_out) ? 0 : -2; break; case PIPE_SHADER_IR_TGSI: - ret = prog->makeFromTGSI(info) ? 0 : -2; + ret = prog->makeFromTGSI(info, info_out) ? 0 : -2; break; default: ret = -1; @@ -1320,7 +1327,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) prog->print(); - targ->parseDriverInfo(info); + targ->parseDriverInfo(info, info_out); prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA); prog->convertToSSA(); @@ -1342,7 +1349,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) prog->optimizePostRA(info->optLevel); - if (!prog->emitBinary(info)) { + if (!prog->emitBinary(info_out)) { ret = -5; goto out; } @@ -1350,10 +1357,10 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) out: INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret); - info->bin.maxGPR = prog->maxGPR; - info->bin.code = prog->code; - info->bin.codeSize = prog->binSize; - info->bin.tlsSpace =
[Mesa-dev] [PATCH v2 3/7] nv50/ir: Add nv50_ir_prog_info_out serialize and deserialize
Adds functions for serializing and deserializing nv50_ir_prog_info_out structure, which are needed for shader caching. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 44 .../nouveau/codegen/nv50_ir_emit_gk110.cpp| 14 +- .../nouveau/codegen/nv50_ir_emit_gm107.cpp| 14 +- .../nouveau/codegen/nv50_ir_emit_nv50.cpp | 6 +- .../nouveau/codegen/nv50_ir_emit_nvc0.cpp | 14 +- .../nouveau/codegen/nv50_ir_serialize.cpp | 196 ++ src/gallium/drivers/nouveau/meson.build | 1 + 7 files changed, 265 insertions(+), 24 deletions(-) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index dab1ce030cb..eea32133ccf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -25,6 +25,7 @@ #include "pipe/p_shader_tokens.h" +#include "util/blob.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" @@ -242,6 +243,49 @@ nv50_ir_apply_fixups(void *fixupData, uint32_t *code, extern void nv50_ir_get_target_library(uint32_t chipset, const uint32_t **code, uint32_t *size); + +#ifdef __cplusplus +namespace nv50_ir +{ + class FixupEntry; + class FixupData; + + void + gk110_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + gm107_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + nv50_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, +const nv50_ir::FixupData& data); + void + nvc0_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, +const nv50_ir::FixupData& data); + void + gk110_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + gm107_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + nvc0_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + +} +#endif + +/* Serialize a nv50_ir_prog_info_out structure and save it into blob */ +extern bool +nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *); + +/* Deserialize from data and save into a nv50_ir_prog_info_out structure + * using a pointer. Size is a total size of the serialized data. + * Offset points to where info_out in data is located. */ +extern bool +nv50_ir_prog_info_out_deserialize(void *data, size_t size, size_t offset, + struct nv50_ir_prog_info_out *); + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 2118c3153f7..e651d7fdcb0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1209,8 +1209,8 @@ CodeEmitterGK110::emitSLCT(const CmpInstruction *i) } } -static void -selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) { int loc = entry->loc; if (data.force_persample_interp) @@ -1227,7 +1227,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i) code[1] |= 1 << 13; if (i->subOp == 1) { - addInterp(0, 0, selpFlip); + addInterp(0, 0, gk110_selpFlip); } } @@ -2042,8 +2042,8 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i) code[1] |= (i->ipa & 0xc) << (19 - 2); } -static void -interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +gk110_interpApply(const struct FixupEntry *entry, uint32_t *code, const FixupData& data) { int ipa = entry->ipa; int reg = entry->reg; @@ -2078,10 +2078,10 @@ CodeEmitterGK110::emitINTERP(const Instruction *i) if (i->op == OP_PINTERP) { srcId(i->src(1), 23); - addInterp(i->ipa, SDATA(i->src(1)).id, interpApply); + addInterp(i->ipa, SDATA(i->src(1)).id, gk110_interpApply); } else { code[0] |= 0xff << 23; - addInterp(i->ipa, 0xff, interpApply); + addInterp(i->ipa, 0xff, gk110_interpApply); } srcId(i->src(0).getIndirect(0), 10); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index e244bd0d610..4970f14cb33 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_e
[Mesa-dev] [PATCH 4/8] nv50/ir: Add prog_info_out print
Adds a function for printing nv50_ir_prog_info_out structure in JSON-like format, which could be used in debugging. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 3 + .../drivers/nouveau/codegen/nv50_ir_print.cpp | 155 ++ 2 files changed, 158 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index bc92a3bc4ee..9eb8a4c4798 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -275,6 +275,9 @@ namespace nv50_ir } #endif +extern void +nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *); + /* Serialize a nv50_ir_prog_info_out structure and save it into blob */ extern bool nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 5dcbf3c3e0c..f19d1a7d280 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -22,6 +22,7 @@ #include "codegen/nv50_ir.h" #include "codegen/nv50_ir_target.h" +#include "codegen/nv50_ir_driver.h" #include @@ -852,3 +853,157 @@ Function::printLiveIntervals() const } } // namespace nv50_ir + +extern void +nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *info_out) +{ + int i; + + INFO("{\n"); + INFO(" \"target\":\"%d\",\n", info_out->target); + INFO(" \"type\":\"%d\",\n", info_out->type); + + // Bin + INFO(" \"bin\":{\n"); + INFO(" \"maxGPR\":\"%d\",\n", info_out->bin.maxGPR); + INFO(" \"tlsSpace\":\"%d\",\n", info_out->bin.tlsSpace); + INFO(" \"smemSize\":\"%d\",\n", info_out->bin.smemSize); + INFO(" \"codeSize\":\"%d\",\n", info_out->bin.codeSize); + INFO(" \"instructions\":\"%d\",\n", info_out->bin.instructions); + + // RelocInfo + INFO(" \"RelocInfo\":"); + if (!info_out->bin.relocData) { + INFO("\"NULL\",\n"); + } + else { + nv50_ir::RelocInfo *reloc = (nv50_ir::RelocInfo *)info_out->bin.relocData; + INFO("{\n"); + INFO(" \"codePos\":\"%d\",\n", reloc->codePos); + INFO(" \"libPos\":\"%d\",\n", reloc->libPos); + INFO(" \"dataPos\":\"%d\",\n", reloc->dataPos); + INFO(" \"count\":\"%d\",\n", reloc->count); + INFO(" \"RelocEntry\":[\n"); + for (unsigned int i = 0; i < reloc->count; i++) { + INFO(" {\"data\":\"%d\",\t\"mask\":\"%d\",\t\"offset\":\"%d\",\t\"bitPos\":\"%d\",\t\"type\":\"%d\"}", + reloc->entry[i].data, reloc->entry[i].mask, reloc->entry[i].offset, reloc->entry[i].bitPos, reloc->entry[i].type + ); + } + INFO("\n"); + INFO(" ]\n"); + INFO(" },\n"); + } + + // FixupInfo + INFO(" \"FixupInfo\":"); + if (!info_out->bin.fixupData) { + INFO("\"NULL\"\n"); + } + else { + nv50_ir::FixupInfo *fixup = (nv50_ir::FixupInfo *)info_out->bin.fixupData; + INFO("{\n"); + INFO(" \"count\":\"%d\"\n", fixup->count); + INFO(" \"FixupEntry\":[\n"); + for (unsigned int i = 0; i < fixup->count; i++) { + INFO(" {\"apply\":\"%p\",\t\"ipa\":\"%d\",\t\"reg\":\"%d\",\t\"loc\":\"%d\"}", + fixup->entry[i].apply, fixup->entry[i].ipa, fixup->entry[i].reg, fixup->entry[i].loc); + } + INFO("\n"); + INFO(" ]\n"); + INFO(" }\n"); + + INFO(" },\n"); + } + + if (info_out->numSysVals) { + INFO(" \"sv\":[\n"); + for (i = 0; i < info_out->numSysVals; i++) { + if (&(info_out->sv[i])) { +INFO(" {\"id\":\"%d\", \"sn\":\"%d\", \"si\":\"%d\"}", + info_out->sv[i].id, info_out->sv[i].sn, info
[Mesa-dev] [PATCH 8/8] nvc0: Add shader disk caching
Adds shader disk caching for nvc0 to reduce the need to every time compile shaders. Shaders are saved into disk_shader_cache from nvc0_screen structure. It serializes the input nv50_ir_prog_info to compute the hash key and also to do a byte compare between the original nv50_ir_prog_info and the one saved in the cache. If keys match and also the byte compare returns they are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the cache can be used instead of compiling input info. Seems to be significantly improving loading times. Piglit tests seem to be OK. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/nvc0/nvc0_context.h | 1 + .../drivers/nouveau/nvc0/nvc0_program.c | 49 --- .../drivers/nouveau/nvc0/nvc0_shader_state.c | 3 +- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 2 + 4 files changed, 46 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 8a2a8f2797e..4b83d1afeb4 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -321,6 +321,7 @@ extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); /* nvc0_program.c */ bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset, +struct disk_cache *, struct pipe_debug_callback *); bool nvc0_program_upload(struct nvc0_context *, struct nvc0_program *); void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 1a5073292e8..06b6f7b4db5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -24,6 +24,7 @@ #include "compiler/nir/nir.h" #include "tgsi/tgsi_ureg.h" +#include "util/blob.h" #include "nvc0/nvc0_context.h" @@ -568,11 +569,19 @@ nvc0_program_dump(struct nvc0_program *prog) bool nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, + struct disk_cache *disk_shader_cache, struct pipe_debug_callback *debug) { + struct blob blob; struct nv50_ir_prog_info *info; struct nv50_ir_prog_info_out info_out = {}; - int ret; + + void *cached_data = NULL; + size_t cached_size; + bool shader_found = false; + + int ret = 0; + cache_key key; info = CALLOC_STRUCT(nv50_ir_prog_info); if (!info) @@ -631,14 +640,38 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->assignSlots = nvc0_program_assign_varying_slots; /* these fields might be overwritten by the compiler */ - info_out.bin.smemSize = prog->cp.smem_size; - info_out.io.genUserClip = prog->vp.num_ucps; - - ret = nv50_ir_generate_code(info, _out); - if (ret) { - NOUVEAU_ERR("shader translation failed: %i\n", ret); - goto out; + info->bin.smemSize = prog->cp.smem_size; + info->io.genUserClip = prog->vp.num_ucps; + + blob_init(); + nv50_ir_prog_info_serialize(, info); + + if (disk_shader_cache) { + disk_cache_compute_key(disk_shader_cache, blob.data, blob.size, key); + cached_data = disk_cache_get(disk_shader_cache, key, _size); + + if (cached_data && cached_size >= blob.size) { // blob.size is the size of serialized "info" + if (memcmp(cached_data, blob.data, blob.size) == 0) { +shader_found = true; +/* Blob contains only "info". In disk cache, "info_out" comes right after it */ +size_t offset = blob.size; +nv50_ir_prog_info_out_deserialize(cached_data, cached_size, offset, _out); + } + } + free(cached_data); + } + if (!shader_found) { + ret = nv50_ir_generate_code(info, _out); + if (ret) { + NOUVEAU_ERR("shader translation failed: %i\n", ret); + goto out; + } + if (disk_shader_cache) { + nv50_ir_prog_info_out_serialize(, _out); + disk_cache_put(disk_shader_cache, key, blob.data, blob.size, NULL); + } } + blob_finish(); prog->code = info_out.bin.code; prog->code_size = info_out.bin.codeSize; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 774c5648113..4327a89454b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -54,7 +54,8 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) if (!prog->translated) { prog->translated = nvc0_program_translate( - prog, nvc0->screen->base.device->chipset, >base.debug); + prog, nvc0->screen->ba
[Mesa-dev] [PATCH 3/8] nv50/ir: Add nv50_ir_prog_info_out serialize and deserialize
Adds functions for serializing and deserializing nv50_ir_prog_info_out structure, which are needed for shader caching. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 44 .../nouveau/codegen/nv50_ir_emit_gk110.cpp| 14 +- .../nouveau/codegen/nv50_ir_emit_gm107.cpp| 14 +- .../nouveau/codegen/nv50_ir_emit_nv50.cpp | 6 +- .../nouveau/codegen/nv50_ir_emit_nvc0.cpp | 14 +- .../nouveau/codegen/nv50_ir_serialize.cpp | 196 ++ src/gallium/drivers/nouveau/meson.build | 1 + 7 files changed, 265 insertions(+), 24 deletions(-) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index f6b5415bc95..bc92a3bc4ee 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -25,6 +25,7 @@ #include "pipe/p_shader_tokens.h" +#include "util/blob.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" @@ -242,6 +243,49 @@ nv50_ir_apply_fixups(void *fixupData, uint32_t *code, extern void nv50_ir_get_target_library(uint32_t chipset, const uint32_t **code, uint32_t *size); + +#ifdef __cplusplus +namespace nv50_ir +{ + class FixupEntry; + class FixupData; + + void + gk110_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + gm107_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + nv50_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, +const nv50_ir::FixupData& data); + void + nvc0_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, +const nv50_ir::FixupData& data); + void + gk110_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + gm107_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + nvc0_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + +} +#endif + +/* Serialize a nv50_ir_prog_info_out structure and save it into blob */ +extern bool +nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *); + +/* Deserialize from data and save into a nv50_ir_prog_info_out structure + * using a pointer. Size is a total size of the serialized data. + * Offset points to where info_out in data is located. */ +extern bool +nv50_ir_prog_info_out_deserialize(void *data, size_t size, size_t offset, + struct nv50_ir_prog_info_out *); + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 2118c3153f7..e651d7fdcb0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1209,8 +1209,8 @@ CodeEmitterGK110::emitSLCT(const CmpInstruction *i) } } -static void -selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) { int loc = entry->loc; if (data.force_persample_interp) @@ -1227,7 +1227,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i) code[1] |= 1 << 13; if (i->subOp == 1) { - addInterp(0, 0, selpFlip); + addInterp(0, 0, gk110_selpFlip); } } @@ -2042,8 +2042,8 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i) code[1] |= (i->ipa & 0xc) << (19 - 2); } -static void -interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +gk110_interpApply(const struct FixupEntry *entry, uint32_t *code, const FixupData& data) { int ipa = entry->ipa; int reg = entry->reg; @@ -2078,10 +2078,10 @@ CodeEmitterGK110::emitINTERP(const Instruction *i) if (i->op == OP_PINTERP) { srcId(i->src(1), 23); - addInterp(i->ipa, SDATA(i->src(1)).id, interpApply); + addInterp(i->ipa, SDATA(i->src(1)).id, gk110_interpApply); } else { code[0] |= 0xff << 23; - addInterp(i->ipa, 0xff, interpApply); + addInterp(i->ipa, 0xff, gk110_interpApply); } srcId(i->src(0).getIndirect(0), 10); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index e244bd0d610..4970f14cb33 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp ++
[Mesa-dev] [PATCH 2/8] util/blob: Add overwrite function for uint8
Overwrite function for this type was missing and I needed it for my project. Signed-off-by: Mark Menzynski --- src/util/blob.c | 9 + src/util/blob.h | 15 +++ 2 files changed, 24 insertions(+) diff --git a/src/util/blob.c b/src/util/blob.c index 94d5a9dea74..5bf4b924c91 100644 --- a/src/util/blob.c +++ b/src/util/blob.c @@ -214,6 +214,15 @@ BLOB_WRITE_TYPE(blob_write_intptr, intptr_t) #define ASSERT_ALIGNED(_offset, _align) \ assert(ALIGN((_offset), (_align)) == (_offset)) +bool +blob_overwrite_uint8 (struct blob *blob, + size_t offset, + uint8_t value) +{ + ASSERT_ALIGNED(offset, sizeof(value)); + return blob_overwrite_bytes(blob, offset, , sizeof(value)); +} + bool blob_overwrite_uint32 (struct blob *blob, size_t offset, diff --git a/src/util/blob.h b/src/util/blob.h index 9113331254a..d5496fef1cd 100644 --- a/src/util/blob.h +++ b/src/util/blob.h @@ -209,6 +209,21 @@ blob_write_uint16(struct blob *blob, uint16_t value); bool blob_write_uint32(struct blob *blob, uint32_t value); +/** + * Overwrite a uint8_t previously written to the blob. + * + * Writes a uint8_t value to an existing portion of the blob at an offset of + * \offset. This data range must have previously been written to the blob by + * one of the blob_write_* calls. + * + * \return True unless the requested position or position+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_uint8(struct blob *blob, + size_t offset, + uint8_t value); + /** * Overwrite a uint32_t previously written to the blob. * -- 2.21.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/8] nv50/ir: add nv50_ir_prog_info_out
From: Karol Herbst Split out the output relevant fields from the nv50_ir_prog_info struct in order to have a cleaner separation between the input and output of the compilation. Signed-off-by: Karol Herbst --- .../drivers/nouveau/codegen/nv50_ir.cpp | 49 ++-- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 9 +- .../drivers/nouveau/codegen/nv50_ir_driver.h | 117 +--- .../nouveau/codegen/nv50_ir_from_common.cpp | 14 +- .../nouveau/codegen/nv50_ir_from_common.h | 3 +- .../nouveau/codegen/nv50_ir_from_nir.cpp | 202 +++--- .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 254 +- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 6 +- .../nouveau/codegen/nv50_ir_target.cpp| 2 +- .../drivers/nouveau/codegen/nv50_ir_target.h | 5 +- .../nouveau/codegen/nv50_ir_target_nv50.cpp | 17 +- .../nouveau/codegen/nv50_ir_target_nv50.h | 3 +- .../drivers/nouveau/nouveau_compiler.c| 9 +- .../drivers/nouveau/nv50/nv50_program.c | 61 ++--- .../drivers/nouveau/nvc0/nvc0_program.c | 89 +++--- 15 files changed, 448 insertions(+), 392 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index c65853578f6..c2c5956874a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1241,15 +1241,18 @@ void Program::releaseValue(Value *value) extern "C" { static void -nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) +nv50_ir_init_prog_info(struct nv50_ir_prog_info *info, + struct nv50_ir_prog_info_out *info_out) { + info_out->target = info->target; + info_out->type = info->type; if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) { - info->prop.tp.domain = PIPE_PRIM_MAX; - info->prop.tp.outputPrim = PIPE_PRIM_MAX; + info_out->prop.tp.domain = PIPE_PRIM_MAX; + info_out->prop.tp.outputPrim = PIPE_PRIM_MAX; } if (info->type == PIPE_SHADER_GEOMETRY) { - info->prop.gp.instanceCount = 1; - info->prop.gp.maxVertices = 1; + info_out->prop.gp.instanceCount = 1; + info_out->prop.gp.maxVertices = 1; } if (info->type == PIPE_SHADER_COMPUTE) { info->prop.cp.numThreads[0] = @@ -1257,23 +1260,26 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) info->prop.cp.numThreads[2] = 1; } info->io.pointSize = 0xff; - info->io.instanceId = 0xff; - info->io.vertexId = 0xff; - info->io.edgeFlagIn = 0xff; - info->io.edgeFlagOut = 0xff; - info->io.fragDepth = 0xff; - info->io.sampleMask = 0xff; + info_out->bin.smemSize = info->bin.smemSize; + info_out->io.genUserClip = info->io.genUserClip; + info_out->io.instanceId = 0xff; + info_out->io.vertexId = 0xff; + info_out->io.edgeFlagIn = 0xff; + info_out->io.edgeFlagOut = 0xff; + info_out->io.fragDepth = 0xff; + info_out->io.sampleMask = 0xff; info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff; } int -nv50_ir_generate_code(struct nv50_ir_prog_info *info) +nv50_ir_generate_code(struct nv50_ir_prog_info *info, + struct nv50_ir_prog_info_out *info_out) { int ret = 0; nv50_ir::Program::Type type; - nv50_ir_init_prog_info(info); + nv50_ir_init_prog_info(info, info_out); #define PROG_TYPE_CASE(a, b) \ case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break @@ -1301,15 +1307,16 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) return -1; } prog->driver = info; + prog->driver_out = info_out; prog->dbgFlags = info->dbgFlags; prog->optLevel = info->optLevel; switch (info->bin.sourceRep) { case PIPE_SHADER_IR_NIR: - ret = prog->makeFromNIR(info) ? 0 : -2; + ret = prog->makeFromNIR(info, info_out) ? 0 : -2; break; case PIPE_SHADER_IR_TGSI: - ret = prog->makeFromTGSI(info) ? 0 : -2; + ret = prog->makeFromTGSI(info, info_out) ? 0 : -2; break; default: ret = -1; @@ -1320,7 +1327,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) prog->print(); - targ->parseDriverInfo(info); + targ->parseDriverInfo(info, info_out); prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA); prog->convertToSSA(); @@ -1342,7 +1349,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) prog->optimizePostRA(info->optLevel); - if (!prog->emitBinary(info)) { + if (!prog->emitBinary(info_out)) { ret = -5; goto out; } @@ -1350,10 +1357,10 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) out: INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret); - info->bin.maxGPR = prog->maxGPR; - info->bin.code = prog->code; - info->bin.codeSize = prog->binSize; - info->bin.tlsSpace =
[Mesa-dev] [PATCH 7/8] nv50/ir: Move separateFragData
Nv50_ir_prog_info (input) was in the wrong place, moved it to nv50_ir_prog_info_out. Signed-off-by: Mark Menzynski --- src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h | 2 +- src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 2 +- src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index cdf19eeabcf..30498ceffaf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -112,7 +112,6 @@ struct nv50_ir_prog_info uint8_t inputPrim; } gp; struct { - bool separateFragData; bool persampleInvocation; } fp; struct { @@ -200,6 +199,7 @@ struct nv50_ir_prog_info_out bool usesSampleMaskIn; bool readsFramebuffer; bool readsSampleLocations; + bool separateFragData; } fp; } prop; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 3efeaab4569..cf5f3d6d7e7 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -2100,7 +2100,7 @@ Converter::visit(nir_intrinsic_instr *insn) atom->setIndirect(0, 0, address); atom->subOp = getSubOp(op); - info->io.globalAccess |= 0x2; + info_out->io.globalAccess |= 0x2; break; } case nir_intrinsic_bindless_image_atomic_add: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 5850dc18fec..c2322f3856a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1176,7 +1176,7 @@ void Source::scanProperty(const struct tgsi_full_property *prop) info_out->prop.gp.instanceCount = prop->u[0].Data; break; case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: - info->prop.fp.separateFragData = true; + info_out->prop.fp.separateFragData = true; break; case TGSI_PROPERTY_FS_COORD_ORIGIN: case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: -- 2.21.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/8] tgsi/util: Change boolean for bool
I was getting errors with "boolean" when compiling. This patch changes boolean to bool from . Signed-off-by: Mark Menzynski --- src/gallium/auxiliary/tgsi/tgsi_util.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_util.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 1e5582ba273..e1b604cff0e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -537,7 +537,7 @@ tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type tgsi_tex) } -boolean +bool tgsi_is_shadow_target(enum tgsi_texture_type target) { switch (target) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h index 686b90f467e..6dc576b1a00 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -28,6 +28,7 @@ #ifndef TGSI_UTIL_H #define TGSI_UTIL_H +#include #include "pipe/p_shader_tokens.h" #if defined __cplusplus @@ -84,11 +85,11 @@ tgsi_util_get_texture_coord_dim(enum tgsi_texture_type tgsi_tex); int tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type tgsi_tex); -boolean +bool tgsi_is_shadow_target(enum tgsi_texture_type target); -static inline boolean +static inline bool tgsi_is_msaa_target(enum tgsi_texture_type target) { return (target == TGSI_TEXTURE_2D_MSAA || -- 2.21.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/8] nv50/ir: Add nv50_ir_prog_info serialize
Adds a function for serializing a nv50_ir_prog_info structure, which is needed for shader caching. Signed-off-by: Mark Menzynski --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 4 + .../nouveau/codegen/nv50_ir_serialize.cpp | 81 +++ 2 files changed, 85 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 9eb8a4c4798..cdf19eeabcf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -278,6 +278,10 @@ namespace nv50_ir extern void nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *); +/* Serialize a nv50_ir_prog_info structure and save it into blob */ +extern bool +nv50_ir_prog_info_serialize(struct blob *, struct nv50_ir_prog_info *); + /* Serialize a nv50_ir_prog_info_out structure and save it into blob */ extern bool nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp index 077f3eba6c8..0f47189f10b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp @@ -17,6 +17,87 @@ enum InterpApply { FLIP_GM107 = 7 }; +extern bool +nv50_ir_prog_info_serialize(struct blob *blob, struct nv50_ir_prog_info *info) +{ + blob_write_uint16(blob, info->target); + blob_write_uint8(blob, info->type); + blob_write_uint8(blob, info->optLevel); + blob_write_uint8(blob, info->dbgFlags); + blob_write_uint8(blob, info->omitLineNum); + blob_write_uint32(blob, info->bin.smemSize); + blob_write_uint16(blob, info->bin.maxOutput); + blob_write_uint8(blob, info->bin.sourceRep); + + switch(info->bin.sourceRep) { + case PIPE_SHADER_IR_TGSI: { + struct tgsi_token *tokens = (struct tgsi_token *)info->bin.source; + unsigned int num_tokens = tgsi_num_tokens(tokens); + + blob_write_uint32(blob, num_tokens); + blob_write_bytes(blob, tokens, num_tokens * sizeof(struct tgsi_token)); + break; + } + case PIPE_SHADER_IR_NIR: { + struct nir_shader *nir = (struct nir_shader *)info->bin.source; + nir_serialize(blob, nir, false); + break; + } + default: + assert(!"unhandled info->bin.sourceRep"); + return false; + } + + blob_write_uint16(blob, info->immd.bufSize); + blob_write_bytes(blob, info->immd.buf, info->immd.bufSize * sizeof(*info->immd.buf)); + blob_write_uint16(blob, info->immd.count); + blob_write_bytes(blob, info->immd.data, info->immd.count * sizeof(*info->immd.data)); + blob_write_bytes(blob, info->immd.type, info->immd.count * 16); // for each vec4 (128 bit) + + switch (info->type) { + case PIPE_SHADER_VERTEX: + blob_write_bytes(blob, info->prop.vp.inputMask, + 4 * sizeof(*info->prop.vp.inputMask)); /* array of size 4 */ + break; + case PIPE_SHADER_TESS_CTRL: + blob_write_uint32(blob, info->prop.cp.inputOffset); + blob_write_uint32(blob, info->prop.cp.sharedOffset); + blob_write_uint32(blob, info->prop.cp.gridInfoBase); + blob_write_bytes(blob, info->prop.cp.numThreads, + 3 * sizeof(*info->prop.cp.numThreads)); /* array of size 3 */ + case PIPE_SHADER_GEOMETRY: + blob_write_uint8(blob, info->prop.gp.inputPrim); + break; + case PIPE_SHADER_FRAGMENT: + blob_write_uint8(blob, info->prop.fp.persampleInvocation); + break; + default: + break; + } + + blob_write_uint8(blob, info->io.auxCBSlot); + blob_write_uint16(blob, info->io.ucpBase); + blob_write_uint16(blob, info->io.drawInfoBase); + blob_write_uint16(blob, info->io.alphaRefBase); + blob_write_uint8(blob, info->io.pointSize); + blob_write_uint8(blob, info->io.viewportId); + blob_write_bytes(blob, info->io.backFaceColor, 2 * sizeof(*info->io.backFaceColor)); + blob_write_uint8(blob, info->io.mul_zero_wins); + blob_write_uint8(blob, info->io.nv50styleSurfaces); + blob_write_uint16(blob, info->io.texBindBase); + blob_write_uint16(blob, info->io.fbtexBindBase); + blob_write_uint16(blob, info->io.suInfoBase); + blob_write_uint16(blob, info->io.bindlessBase); + blob_write_uint16(blob, info->io.bufInfoBase); + blob_write_uint16(blob, info->io.sampleInfoBase); + blob_write_uint8(blob, info->io.msInfoCBSlot); + blob_write_uint16(blob, info->io.msInfoBase); + blob_write_uint16(blob, info->io.uboInfoBase); + blob_write_uint8(blob, info->io.genUserClip); + + return true; +} + extern bool nv50_ir_prog_info_out_serialize(struct blob
[Mesa-dev] [PATCH] nv50/ir: Add mul and mod constant optimizations
Optimizations for 0/n, 1/n and 0%n. No changes in shader db tests, because it is never used here, but it should become handy. Signed-off-by: Mark Menzynski --- .../nouveau/codegen/nv50_ir_peephole.cpp | 30 +-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 0b3220903b9..12069e19808 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1177,10 +1177,28 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue , int s) break; case OP_DIV: - if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32)) + if (i->dType != TYPE_S32 && i->dType != TYPE_U32) break; + bld.setPosition(i, false); - if (imm0.reg.data.u32 == 0) { + if (s == 0) { + if (imm0.reg.data.u32 == 0) { +i->op = OP_MOV; +i->setSrc(1, NULL); + } + else if (imm0.reg.data.u32 == 1) { +Value *tA, *tB; +Instruction *slct; + +tA = bld.mkOp1v(OP_ABS, TYPE_U32, bld.getSSA(), i->getSrc(1)); +tB = bld.mkOp2v(OP_ADD, TYPE_S32, bld.getSSA(), tA, bld.loadImm(NULL, -1)); +slct = bld.mkCmp(OP_SLCT, CC_GT, i->dType, bld.getSSA(), TYPE_U32, bld.loadImm(NULL, 0), i->getSrc(1), tB); +i->def(0).replace(slct->getDef(0), false); + } + break; + } + + if (s != 1 || imm0.reg.data.u32 == 0) { break; } else if (imm0.reg.data.u32 == 1) { @@ -1259,6 +1277,14 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue , int s) break; case OP_MOD: + if (s == 0) { + if (imm0.reg.data.u32 == 0) { +i->op = OP_MOV; +i->setSrc(1, NULL); + } + break; + } + if (s == 1 && imm0.isPow2()) { bld.setPosition(i, false); if (i->sType == TYPE_U32) { -- 2.21.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] gm107/ir: Add stg, ldg instructions and function for checking offset length
Nvidia actively uses these instructions, maybe they are better in something. Long offset checking function was made because these functions only have 24 bit address offsets. v2: removed long offset funnction Signed-off-by: Mark Menzynski --- .../nouveau/codegen/nv50_ir_emit_gm107.cpp| 24 +++ 1 file changed, 24 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 6eefe8f0025..8da5adb94ce 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -174,9 +174,11 @@ private: void emitLDC(); void emitLDL(); void emitLDS(); + void emitLDG(); void emitLD(); void emitSTL(); void emitSTS(); + void emitSTG(); void emitST(); void emitALD(); void emitAST(); @@ -2414,6 +2416,17 @@ CodeEmitterGM107::emitLDS() emitGPR (0x00, insn->def(0)); } +void +CodeEmitterGM107::emitLDG() +{ + emitInsn (0xeed0); + emitLDSTs(0x30, insn->dType); + emitLDSTc(0x2e); + emitField(0x2d, 1, insn->src(0).getIndirect(0)->getSize() == 8); + emitADDR (0x08, 0x14, 24, 0, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + void CodeEmitterGM107::emitLD() { @@ -2445,6 +2458,17 @@ CodeEmitterGM107::emitSTS() emitGPR (0x00, insn->src(1)); } +void +CodeEmitterGM107::emitSTG() +{ + emitInsn (0xeed8); + emitLDSTs(0x30, insn->dType); + emitLDSTc(0x2e); + emitField(0x2d, 1, insn->src(0).getIndirect(0)->getSize() == 8); + emitADDR (0x08, 0x14, 24, 0, insn->src(0)); + emitGPR (0x00, insn->src(1)); +} + void CodeEmitterGM107::emitST() { -- 2.21.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gm107/ir: Add stg, ldg instructions and function for checking offset length
> > @@ -2414,6 +2428,17 @@ CodeEmitterGM107::emitLDS() > > emitGPR (0x00, insn->def(0)); > > } > > > > +void > > +CodeEmitterGM107::emitLDG() > > +{ > > + emitInsn (0xeed0); > > + emitLDSTs(0x30, insn->dType); > > + emitLDSTc(0x2e); > > + emitField(0x2d, 1, insn->src(0).getIndirect(0)->getSize() == 8); > > I didn't look, but we don't do something a bit more subtle on the > other ones, like checking if there's an indirect access in the first > place? With g[], it almost exclusively will be, but still... It's done same in the original store and load functions. > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gm107/ir: Add stg, ldg instructions and function for checking offset length
On Fri, Jul 19, 2019 at 5:04 PM Ilia Mirkin wrote: > > On Fri, Jul 19, 2019 at 10:57 AM Mark Menzynski wrote: > > > > Nvidia actively uses these instructions, maybe they are better in > > something. > > Long offset checking function was made because these functions only have 24 > > bit > > address offsets. > > > > Signed-off-by: Mark Menzynski > > --- > > .../nouveau/codegen/nv50_ir_emit_gm107.cpp| 36 +++ > > 1 file changed, 36 insertions(+) > > > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > > b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > > index 6eefe8f0025..c01a3017ba9 100644 > > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > > @@ -87,6 +87,7 @@ private: > > inline void emitADDR(int, int, int, int, const ValueRef &); > > inline void emitCBUF(int, int, int, int, int, const ValueRef &); > > inline bool longIMMD(const ValueRef &); > > + inline bool longOffset(const ValueRef &); > > inline void emitIMMD(int, int, const ValueRef &); > > > > void emitCond3(int, CondCode); > > @@ -174,9 +175,11 @@ private: > > void emitLDC(); > > void emitLDL(); > > void emitLDS(); > > + void emitLDG(); > > void emitLD(); > > void emitSTL(); > > void emitSTS(); > > + void emitSTG(); > > void emitST(); > > void emitALD(); > > void emitAST(); > > @@ -333,6 +336,17 @@ CodeEmitterGM107::longIMMD(const ValueRef ) > > return false; > > } > > > > +bool > > +CodeEmitterGM107::longOffset(const ValueRef ) > > +{ > > + // TODO: check for other files as well? > > + if (ref.getFile() != FILE_MEMORY_GLOBAL) > > + return false; > > I haven't seen the uses (best to send stuff like this as a series), > but you're saying that if it's not global memory, then it's not a long > offset? I suspect in the caller it should be more like > > assert(file == global || !long offset) or something. > This is how I used it for Load. Store was used the same way. I have not sent it because we didn't found any noticeable changes with that: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index c01a3017ba9..f632178138b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -3603,7 +3603,12 @@ CodeEmitterGM107::emitInstruction(Instruction *i) case FILE_MEMORY_CONST : emitLDC(); break; case FILE_MEMORY_LOCAL : emitLDL(); break; case FILE_MEMORY_SHARED: emitLDS(); break; - case FILE_MEMORY_GLOBAL: emitLD(); break; + case FILE_MEMORY_GLOBAL: + if (longOffset(insn->src(0))) +emitLD(); + else +emitLDG(); + break; default: assert(!"invalid load"); emitNOP(); > > + > > + int32_t offset = ref.get()->reg.data.offset; > > + return offset > 0x7f || offset < -0x80; > > You have two spaces after the >. Remove one of them. > > > +} > > + > > void > > CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef ) > > { > > @@ -2414,6 +2428,17 @@ CodeEmitterGM107::emitLDS() > > emitGPR (0x00, insn->def(0)); > > } > > > > +void > > +CodeEmitterGM107::emitLDG() > > +{ > > + emitInsn (0xeed0); > > + emitLDSTs(0x30, insn->dType); > > + emitLDSTc(0x2e); > > + emitField(0x2d, 1, insn->src(0).getIndirect(0)->getSize() == 8); > > I didn't look, but we don't do something a bit more subtle on the > other ones, like checking if there's an indirect access in the first > place? With g[], it almost exclusively will be, but still... > > > + emitADDR (0x08, 0x14, 24, 0, insn->src(0)); > > + emitGPR (0x00, insn->def(0)); > > +} > > + > > void > > CodeEmitterGM107::emitLD() > > { > > @@ -2445,6 +2470,17 @@ CodeEmitterGM107::emitSTS() > > emitGPR (0x00, insn->src(1)); > > } > > > > +void > > +CodeEmitterGM107::emitSTG() > > +{ > > + emitInsn (0xeed8); > > + emitLDSTs(0x30, insn->dType); > > + emitLDSTc(0x2e); > > + emitField(0x2d, 1, insn->src(0).getIndirect(0)->getSize() == 8); > > + emitADDR (0x08, 0x14, 24, 0, insn->src(0)); > > + emitGPR (0x00, insn->src(1)); > > +} > > + > > void > > CodeEmitterGM107::emitST() > > { > > -- > > 2.21.0 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] gm107/ir: Add stg, ldg instructions and function for checking offset length
Nvidia actively uses these instructions, maybe they are better in something. Long offset checking function was made because these functions only have 24 bit address offsets. Signed-off-by: Mark Menzynski --- .../nouveau/codegen/nv50_ir_emit_gm107.cpp| 36 +++ 1 file changed, 36 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 6eefe8f0025..c01a3017ba9 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -87,6 +87,7 @@ private: inline void emitADDR(int, int, int, int, const ValueRef &); inline void emitCBUF(int, int, int, int, int, const ValueRef &); inline bool longIMMD(const ValueRef &); + inline bool longOffset(const ValueRef &); inline void emitIMMD(int, int, const ValueRef &); void emitCond3(int, CondCode); @@ -174,9 +175,11 @@ private: void emitLDC(); void emitLDL(); void emitLDS(); + void emitLDG(); void emitLD(); void emitSTL(); void emitSTS(); + void emitSTG(); void emitST(); void emitALD(); void emitAST(); @@ -333,6 +336,17 @@ CodeEmitterGM107::longIMMD(const ValueRef ) return false; } +bool +CodeEmitterGM107::longOffset(const ValueRef ) +{ + // TODO: check for other files as well? + if (ref.getFile() != FILE_MEMORY_GLOBAL) + return false; + + int32_t offset = ref.get()->reg.data.offset; + return offset > 0x7f || offset < -0x80; +} + void CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef ) { @@ -2414,6 +2428,17 @@ CodeEmitterGM107::emitLDS() emitGPR (0x00, insn->def(0)); } +void +CodeEmitterGM107::emitLDG() +{ + emitInsn (0xeed0); + emitLDSTs(0x30, insn->dType); + emitLDSTc(0x2e); + emitField(0x2d, 1, insn->src(0).getIndirect(0)->getSize() == 8); + emitADDR (0x08, 0x14, 24, 0, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + void CodeEmitterGM107::emitLD() { @@ -2445,6 +2470,17 @@ CodeEmitterGM107::emitSTS() emitGPR (0x00, insn->src(1)); } +void +CodeEmitterGM107::emitSTG() +{ + emitInsn (0xeed8); + emitLDSTs(0x30, insn->dType); + emitLDSTc(0x2e); + emitField(0x2d, 1, insn->src(0).getIndirect(0)->getSize() == 8); + emitADDR (0x08, 0x14, 24, 0, insn->src(0)); + emitGPR (0x00, insn->src(1)); +} + void CodeEmitterGM107::emitST() { -- 2.21.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nvc0/ir: Fix assert accessing null pointer
Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=111007 Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=67 Signed-off-by: Mark Menzynski --- src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index aca3b0afb1e..1f702a987d8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -51,12 +51,12 @@ NVC0LegalizeSSA::handleDIV(Instruction *i) // Generate movs to the input regs for the call we want to generate for (int s = 0; i->srcExists(s); ++s) { Instruction *ld = i->getSrc(s)->getInsn(); - assert(ld->getSrc(0) != NULL); // check if we are moving an immediate, propagate it in that case if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV) || !(ld->src(0).getFile() == FILE_IMMEDIATE)) bld.mkMovToReg(s, i->getSrc(s)); else { + assert(ld->getSrc(0) != NULL); bld.mkMovToReg(s, ld->getSrc(0)); // Clear the src, to make code elimination possible here before we // delete the instruction i later -- 2.21.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev