[Mesa-dev] [PATCH v3 3/8] nv50/ir: Add nv50_ir_prog_info_out serialize and deserialize

2020-03-27 Thread Mark Menzynski
Adds functions for serializing and deserializing
nv50_ir_prog_info_out structure, which are needed for shader caching.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h  |  44 +
 .../nouveau/codegen/nv50_ir_emit_gk110.cpp|  14 +-
 .../nouveau/codegen/nv50_ir_emit_gm107.cpp|  14 +-
 .../nouveau/codegen/nv50_ir_emit_nv50.cpp |   6 +-
 .../nouveau/codegen/nv50_ir_emit_nvc0.cpp |  14 +-
 .../nouveau/codegen/nv50_ir_serialize.cpp | 185 ++
 src/gallium/drivers/nouveau/meson.build   |   1 +
 7 files changed, 254 insertions(+), 24 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index dab1ce030cb..591aa8f57e8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -25,6 +25,7 @@
 
 #include "pipe/p_shader_tokens.h"
 
+#include "util/blob.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_scan.h"
@@ -242,6 +243,49 @@ nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
 extern void nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size);
 
+
+#ifdef __cplusplus
+namespace nv50_ir
+{
+   class FixupEntry;
+   class FixupData;
+
+   void
+   gk110_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+ const nv50_ir::FixupData& data);
+   void
+   gm107_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+ const nv50_ir::FixupData& data);
+   void
+   nv50_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+const nv50_ir::FixupData& data);
+   void
+   nvc0_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+const nv50_ir::FixupData& data);
+   void
+   gk110_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
+  const nv50_ir::FixupData& data);
+   void
+   gm107_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
+  const nv50_ir::FixupData& data);
+   void
+   nvc0_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
+ const nv50_ir::FixupData& data);
+
+}
+#endif
+
+/* Serialize a nv50_ir_prog_info_out structure and save it into blob */
+extern bool MUST_CHECK
+nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *);
+
+/* Deserialize from data and save into a nv50_ir_prog_info_out structure
+ * using a pointer. Size is a total size of the serialized data.
+ * Offset points to where info_out in data is located. */
+extern bool MUST_CHECK
+nv50_ir_prog_info_out_deserialize(void *data, size_t size, size_t offset,
+  struct nv50_ir_prog_info_out *);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 2118c3153f7..e651d7fdcb0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1209,8 +1209,8 @@ CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
}
 }
 
-static void
-selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+void
+gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
 {
int loc = entry->loc;
if (data.force_persample_interp)
@@ -1227,7 +1227,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i)
   code[1] |= 1 << 13;
 
if (i->subOp == 1) {
-  addInterp(0, 0, selpFlip);
+  addInterp(0, 0, gk110_selpFlip);
}
 }
 
@@ -2042,8 +2042,8 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i)
code[1] |= (i->ipa & 0xc) << (19 - 2);
 }
 
-static void
-interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+void
+gk110_interpApply(const struct FixupEntry *entry, uint32_t *code, const 
FixupData& data)
 {
int ipa = entry->ipa;
int reg = entry->reg;
@@ -2078,10 +2078,10 @@ CodeEmitterGK110::emitINTERP(const Instruction *i)
 
if (i->op == OP_PINTERP) {
   srcId(i->src(1), 23);
-  addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
+  addInterp(i->ipa, SDATA(i->src(1)).id, gk110_interpApply);
} else {
   code[0] |= 0xff << 23;
-  addInterp(i->ipa, 0xff, interpApply);
+  addInterp(i->ipa, 0xff, gk110_interpApply);
}
 
srcId(i->src(0).getIndirect(0), 10);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index e244bd0d610..4970f14cb33 100644
--- a/src/gallium/drivers/nouveau/code

[Mesa-dev] [PATCH v3 8/8] nv50: Add shader disk caching

2020-03-27 Thread Mark Menzynski
Adds shader disk caching for nv50 to reduce the need to every time compile
shaders. Shaders are saved into disk_shader_cache from nv50_screen structure.

It serializes the input nv50_ir_prog_info to compute the hash key and
also to do a byte compare between the original nv50_ir_prog_info and the one
saved in the cache. If keys match and also the byte compare returns they
are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the
cache can be used instead of compiling input info.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/nv50/nv50_program.c   | 283 +++---
 .../drivers/nouveau/nv50/nv50_program.h   |   2 +
 .../drivers/nouveau/nv50/nv50_shader_state.c  |   4 +-
 src/gallium/drivers/nouveau/nv50/nv50_state.c |   1 +
 4 files changed, 54 insertions(+), 236 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c 
b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index f4f2e951fd5..a03d5b9f6d0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -37,108 +37,6 @@ bitcount4(const uint32_t val)
return cnt[val & 0xf];
 }
 
-static int
-nv50_vertprog_assign_slots(struct nv50_ir_prog_info_out *info)
-{
-   struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
-   unsigned i, n, c;
-
-   n = 0;
-   for (i = 0; i < info->numInputs; ++i) {
-  prog->in[i].id = i;
-  prog->in[i].sn = info->in[i].sn;
-  prog->in[i].si = info->in[i].si;
-  prog->in[i].hw = n;
-  prog->in[i].mask = info->in[i].mask;
-
-  prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
-
-  for (c = 0; c < 4; ++c)
- if (info->in[i].mask & (1 << c))
-info->in[i].slot[c] = n++;
-
-  if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
- prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
-   }
-   prog->in_nr = info->numInputs;
-
-   for (i = 0; i < info->numSysVals; ++i) {
-  switch (info->sv[i].sn) {
-  case TGSI_SEMANTIC_INSTANCEID:
- prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
- continue;
-  case TGSI_SEMANTIC_VERTEXID:
- prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
- prog->vp.attrs[2] |= 
NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
- continue;
-  default:
- break;
-  }
-   }
-
-   /*
-* Corner case: VP has no inputs, but we will still need to submit data to
-* draw it. HW will shout at us and won't draw anything if we don't enable
-* any input, so let's just pretend it's the first one.
-*/
-   if (prog->vp.attrs[0] == 0 &&
-   prog->vp.attrs[1] == 0 &&
-   prog->vp.attrs[2] == 0)
-  prog->vp.attrs[0] |= 0xf;
-
-   /* VertexID before InstanceID */
-   if (info->io.vertexId < info->numSysVals)
-  info->sv[info->io.vertexId].slot[0] = n++;
-   if (info->io.instanceId < info->numSysVals)
-  info->sv[info->io.instanceId].slot[0] = n++;
-
-   n = 0;
-   for (i = 0; i < info->numOutputs; ++i) {
-  switch (info->out[i].sn) {
-  case TGSI_SEMANTIC_PSIZE:
- prog->vp.psiz = i;
- break;
-  case TGSI_SEMANTIC_CLIPDIST:
- prog->vp.clpd[info->out[i].si] = n;
- break;
-  case TGSI_SEMANTIC_EDGEFLAG:
- prog->vp.edgeflag = i;
- break;
-  case TGSI_SEMANTIC_BCOLOR:
- prog->vp.bfc[info->out[i].si] = i;
- break;
-  case TGSI_SEMANTIC_LAYER:
- prog->gp.has_layer = true;
- prog->gp.layerid = n;
- break;
-  case TGSI_SEMANTIC_VIEWPORT_INDEX:
- prog->gp.has_viewport = true;
- prog->gp.viewportid = n;
- break;
-  default:
- break;
-  }
-  prog->out[i].id = i;
-  prog->out[i].sn = info->out[i].sn;
-  prog->out[i].si = info->out[i].si;
-  prog->out[i].hw = n;
-  prog->out[i].mask = info->out[i].mask;
-
-  for (c = 0; c < 4; ++c)
- if (info->out[i].mask & (1 << c))
-info->out[i].slot[c] = n++;
-   }
-   prog->out_nr = info->numOutputs;
-   prog->max_out = n;
-   if (!prog->max_out)
-  prog->max_out = 1;
-
-   if (prog->vp.psiz < info->numOutputs)
-  prog->vp.psiz = prog->out[prog->vp.psiz].hw;
-
-   return 0;
-}
-
 static int
 nv50_vertprog_assign_slots_info(struct nv50_ir_prog_info_out *info)
 {
@@ -263,115 +161,6 @@ nv50_vertprog_assign_slots_prog(struct 
nv50_ir_prog_info_out *info)
return 0;
 }
 
-static int
-nv50_fragprog_assign_slots(struct nv50_ir_prog_info_out *info)
-{
-   struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
-   unsigned i, n, m, c;
-   unsigned nvary;
-   unsi

[Mesa-dev] [PATCH v3 7/8] nv50: Add separate functions for varying bits

2020-03-27 Thread Mark Menzynski
This separation will be needed for shader disk caching. The reason for it
is that when loading shaders from cache, data in info structure already gets
loaded. That means varying bits for info is needed only when compiling
shaders and not needed when loading from cache. Varying bits for prog are
needed in both cases.

Unfortunately, I don't know how most of the code works, I have separated
this manually, only by looking at the original code. That means that this
patch is experimental. Together with following commit it works
(there seem to be no regressions at all in VK-GL-CTS
[openglcts/data/mustpass/gl/khronos_mustpass/4.6.1.x/gl33-master.txt]
and all benchmarks behaved normally). Unfortunately, I cannot test in
Piglit because of technical problems, so there might be still some
work needed.
I am mainly asking to help with the function names,
look for bugs and pointing out useless code. I will be glad for every
review.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/nv50/nv50_program.c   | 344 ++
 1 file changed, 344 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c 
b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index e36b8a0f8cc..f4f2e951fd5 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -139,6 +139,130 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info_out 
*info)
return 0;
 }
 
+static int
+nv50_vertprog_assign_slots_info(struct nv50_ir_prog_info_out *info)
+{
+   unsigned i, n, c;
+
+   n = 0;
+   for (i = 0; i < info->numInputs; ++i) {
+   for (c = 0; c < 4; ++c)
+ if (info->in[i].mask & (1 << c))
+info->in[i].slot[c] = n++;
+   }
+
+   /* VertexID before InstanceID */
+   if (info->io.vertexId < info->numSysVals)
+  info->sv[info->io.vertexId].slot[0] = n++;
+   if (info->io.instanceId < info->numSysVals)
+  info->sv[info->io.instanceId].slot[0] = n++;
+
+   n = 0;
+   for (i = 0; i < info->numOutputs; ++i) {
+  for (c = 0; c < 4; ++c)
+ if (info->out[i].mask & (1 << c))
+info->out[i].slot[c] = n++;
+   }
+
+   return 0;
+}
+
+static int
+nv50_vertprog_assign_slots_prog(struct nv50_ir_prog_info_out *info)
+{
+   struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
+   unsigned i, n, c;
+
+   n = 0;
+   for (i = 0; i < info->numInputs; ++i) {
+  prog->in[i].id = i;
+  prog->in[i].sn = info->in[i].sn;
+  prog->in[i].si = info->in[i].si;
+  prog->in[i].hw = n;
+  prog->in[i].mask = info->in[i].mask;
+
+  prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
+
+  for (c = 0; c < 4; ++c)
+ if (info->in[i].mask & (1 << c))
+n++;
+
+  if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
+   }
+   prog->in_nr = info->numInputs;
+
+   for (i = 0; i < info->numSysVals; ++i) {
+  switch (info->sv[i].sn) {
+  case TGSI_SEMANTIC_INSTANCEID:
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
+ continue;
+  case TGSI_SEMANTIC_VERTEXID:
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
+ prog->vp.attrs[2] |= 
NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
+ continue;
+  default:
+ break;
+  }
+   }
+
+   /*
+* Corner case: VP has no inputs, but we will still need to submit data to
+* draw it. HW will shout at us and won't draw anything if we don't enable
+* any input, so let's just pretend it's the first one.
+*/
+   if (prog->vp.attrs[0] == 0 &&
+   prog->vp.attrs[1] == 0 &&
+   prog->vp.attrs[2] == 0)
+  prog->vp.attrs[0] |= 0xf;
+
+   n = 0;
+   for (i = 0; i < info->numOutputs; ++i) {
+  switch (info->out[i].sn) {
+  case TGSI_SEMANTIC_PSIZE:
+ prog->vp.psiz = i;
+ break;
+  case TGSI_SEMANTIC_CLIPDIST:
+ prog->vp.clpd[info->out[i].si] = n;
+ break;
+  case TGSI_SEMANTIC_EDGEFLAG:
+ prog->vp.edgeflag = i;
+ break;
+  case TGSI_SEMANTIC_BCOLOR:
+ prog->vp.bfc[info->out[i].si] = i;
+ break;
+  case TGSI_SEMANTIC_LAYER:
+ prog->gp.has_layer = true;
+ prog->gp.layerid = n;
+ break;
+  case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ prog->gp.has_viewport = true;
+ prog->gp.viewportid = n;
+ break;
+  default:
+ break;
+  }
+  prog->out[i].id = i;
+  prog->out[i].sn = info->out[i].sn;
+  prog->out[i].si = info->out[i].si;
+  prog->out[i].hw = n;
+  prog->out[i].mask = info->out[i].mask;
+
+  for (c = 0; c < 4; ++c)
+  

[Mesa-dev] [PATCH v3 2/8] nv50/ir: add nv50_ir_prog_info_out

2020-03-27 Thread Mark Menzynski
From: Karol Herbst 

Split out the output relevant fields from the nv50_ir_prog_info struct
in order to have a cleaner separation between the input and output of
the compilation.

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir.cpp   |  49 ++--
 src/gallium/drivers/nouveau/codegen/nv50_ir.h |   9 +-
 .../drivers/nouveau/codegen/nv50_ir_driver.h  | 117 +---
 .../nouveau/codegen/nv50_ir_from_common.cpp   |  14 +-
 .../nouveau/codegen/nv50_ir_from_common.h |   3 +-
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 204 +++---
 .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 256 +-
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp |   6 +-
 .../nouveau/codegen/nv50_ir_target.cpp|   2 +-
 .../drivers/nouveau/codegen/nv50_ir_target.h  |   5 +-
 .../nouveau/codegen/nv50_ir_target_nv50.cpp   |  17 +-
 .../nouveau/codegen/nv50_ir_target_nv50.h |   3 +-
 .../drivers/nouveau/nouveau_compiler.c|   9 +-
 .../drivers/nouveau/nv50/nv50_program.c   |  61 +++--
 .../drivers/nouveau/nvc0/nvc0_program.c   |  87 +++---
 15 files changed, 448 insertions(+), 394 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index c65853578f6..c2c5956874a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1241,15 +1241,18 @@ void Program::releaseValue(Value *value)
 extern "C" {
 
 static void
-nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
+nv50_ir_init_prog_info(struct nv50_ir_prog_info *info,
+   struct nv50_ir_prog_info_out *info_out)
 {
+   info_out->target = info->target;
+   info_out->type = info->type;
if (info->type == PIPE_SHADER_TESS_CTRL || info->type == 
PIPE_SHADER_TESS_EVAL) {
-  info->prop.tp.domain = PIPE_PRIM_MAX;
-  info->prop.tp.outputPrim = PIPE_PRIM_MAX;
+  info_out->prop.tp.domain = PIPE_PRIM_MAX;
+  info_out->prop.tp.outputPrim = PIPE_PRIM_MAX;
}
if (info->type == PIPE_SHADER_GEOMETRY) {
-  info->prop.gp.instanceCount = 1;
-  info->prop.gp.maxVertices = 1;
+  info_out->prop.gp.instanceCount = 1;
+  info_out->prop.gp.maxVertices = 1;
}
if (info->type == PIPE_SHADER_COMPUTE) {
   info->prop.cp.numThreads[0] =
@@ -1257,23 +1260,26 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
   info->prop.cp.numThreads[2] = 1;
}
info->io.pointSize = 0xff;
-   info->io.instanceId = 0xff;
-   info->io.vertexId = 0xff;
-   info->io.edgeFlagIn = 0xff;
-   info->io.edgeFlagOut = 0xff;
-   info->io.fragDepth = 0xff;
-   info->io.sampleMask = 0xff;
+   info_out->bin.smemSize = info->bin.smemSize;
+   info_out->io.genUserClip = info->io.genUserClip;
+   info_out->io.instanceId = 0xff;
+   info_out->io.vertexId = 0xff;
+   info_out->io.edgeFlagIn = 0xff;
+   info_out->io.edgeFlagOut = 0xff;
+   info_out->io.fragDepth = 0xff;
+   info_out->io.sampleMask = 0xff;
info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff;
 }
 
 int
-nv50_ir_generate_code(struct nv50_ir_prog_info *info)
+nv50_ir_generate_code(struct nv50_ir_prog_info *info,
+  struct nv50_ir_prog_info_out *info_out)
 {
int ret = 0;
 
nv50_ir::Program::Type type;
 
-   nv50_ir_init_prog_info(info);
+   nv50_ir_init_prog_info(info, info_out);
 
 #define PROG_TYPE_CASE(a, b)  \
case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
@@ -1301,15 +1307,16 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
   return -1;
}
prog->driver = info;
+   prog->driver_out = info_out;
prog->dbgFlags = info->dbgFlags;
prog->optLevel = info->optLevel;
 
switch (info->bin.sourceRep) {
case PIPE_SHADER_IR_NIR:
-  ret = prog->makeFromNIR(info) ? 0 : -2;
+  ret = prog->makeFromNIR(info, info_out) ? 0 : -2;
   break;
case PIPE_SHADER_IR_TGSI:
-  ret = prog->makeFromTGSI(info) ? 0 : -2;
+  ret = prog->makeFromTGSI(info, info_out) ? 0 : -2;
   break;
default:
   ret = -1;
@@ -1320,7 +1327,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
   prog->print();
 
-   targ->parseDriverInfo(info);
+   targ->parseDriverInfo(info, info_out);
prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
 
prog->convertToSSA();
@@ -1342,7 +1349,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
 
prog->optimizePostRA(info->optLevel);
 
-   if (!prog->emitBinary(info)) {
+   if (!prog->emitBinary(info_out)) {
   ret = -5;
   goto out;
}
@@ -1350,10 +1357,10 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
 out:
INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
 
-   info->bin.maxGPR = prog->maxGPR;
-   info->bin.code = prog->code;
-   info->bin.codeSize = prog->binSize;
-   info->bin.tlsSpace = 

[Mesa-dev] [PATCH v3 5/8] nv50/ir: Add nv50_ir_prog_info serialize

2020-03-27 Thread Mark Menzynski
Adds a function for serializing a nv50_ir_prog_info structure, which is
needed for shader caching.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h  |  4 ++
 .../nouveau/codegen/nv50_ir_serialize.cpp | 42 +++
 2 files changed, 46 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 1bfaa8483ca..d33c6b6b83c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -278,6 +278,10 @@ namespace nv50_ir
 extern void
 nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *);
 
+/* Serialize a nv50_ir_prog_info structure and save it into blob */
+extern bool
+nv50_ir_prog_info_serialize(struct blob *, struct nv50_ir_prog_info *);
+
 /* Serialize a nv50_ir_prog_info_out structure and save it into blob */
 extern bool MUST_CHECK
 nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
index 52ceb66947f..e9d1d0b3215 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
@@ -16,6 +16,48 @@ enum FixupApplyFunc {
FLIP_GM107
 };
 
+extern bool
+nv50_ir_prog_info_serialize(struct blob *blob, struct nv50_ir_prog_info *info)
+{
+   blob_write_uint16(blob, info->target);
+   blob_write_uint8(blob, info->type);
+   blob_write_uint8(blob, info->optLevel);
+   blob_write_uint8(blob, info->dbgFlags);
+   blob_write_uint8(blob, info->omitLineNum);
+   blob_write_uint32(blob, info->bin.smemSize);
+   blob_write_uint16(blob, info->bin.maxOutput);
+   blob_write_uint8(blob, info->bin.sourceRep);
+
+   switch(info->bin.sourceRep) {
+  case PIPE_SHADER_IR_TGSI: {
+ struct tgsi_token *tokens = (struct tgsi_token *)info->bin.source;
+ unsigned int num_tokens = tgsi_num_tokens(tokens);
+
+ blob_write_uint32(blob, num_tokens);
+ blob_write_bytes(blob, tokens, num_tokens * sizeof(struct 
tgsi_token));
+ break;
+  }
+  case PIPE_SHADER_IR_NIR: {
+ struct nir_shader *nir = (struct nir_shader *)info->bin.source;
+ nir_serialize(blob, nir, false);
+ break;
+  }
+  default:
+ assert(!"unhandled info->bin.sourceRep switch case");
+ return false;
+   }
+
+   blob_write_uint16(blob, info->immd.bufSize);
+   blob_write_bytes(blob, info->immd.buf, info->immd.bufSize * 
sizeof(*info->immd.buf));
+   blob_write_uint16(blob, info->immd.count);
+   blob_write_bytes(blob, info->immd.data, info->immd.count * 
sizeof(*info->immd.data));
+   blob_write_bytes(blob, info->immd.type, info->immd.count * 16); // for each 
vec4 (128 bit)
+   blob_write_bytes(blob, >prop, sizeof(info->prop));
+   blob_write_bytes(blob, >io, sizeof(info->io));
+
+   return true;
+}
+
 extern bool
 nv50_ir_prog_info_out_serialize(struct blob *blob,
 struct nv50_ir_prog_info_out *info_out)
-- 
2.24.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 6/8] nvc0: Add shader disk caching

2020-03-27 Thread Mark Menzynski
Adds shader disk caching for nvc0 to reduce the need to every time compile
shaders. Shaders are saved into disk_shader_cache from nvc0_screen structure.

It serializes the input nv50_ir_prog_info to compute the hash key and
also to do a byte compare between the original nv50_ir_prog_info and the one
saved in the cache. If keys match and also the byte compare returns they
are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the
cache can be used instead of compiling input info.

Seems to be significantly improving loading times, these are the results
from running bunch of shaders:
cache off
real2m58.574s
user21m34.018s
sys 0m8.055s

cache on, first run
real3m32.617s
user24m52.701s
sys 0m20.400s

cache on, second run
real0m23.745s
user2m43.566s
sys 0m4.532s

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/nvc0/nvc0_context.h   |  1 +
 .../drivers/nouveau/nvc0/nvc0_program.c   | 51 +--
 .../drivers/nouveau/nvc0/nvc0_shader_state.c  |  3 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c |  2 +
 4 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 8a2a8f2797e..4b83d1afeb4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -321,6 +321,7 @@ extern struct draw_stage *nvc0_draw_render_stage(struct 
nvc0_context *);
 
 /* nvc0_program.c */
 bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset,
+struct disk_cache *,
 struct pipe_debug_callback *);
 bool nvc0_program_upload(struct nvc0_context *, struct nvc0_program *);
 void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 7f32dc941d6..50430931194 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -24,6 +24,7 @@
 
 #include "compiler/nir/nir.h"
 #include "tgsi/tgsi_ureg.h"
+#include "util/blob.h"
 
 #include "nvc0/nvc0_context.h"
 
@@ -568,11 +569,18 @@ nvc0_program_dump(struct nvc0_program *prog)
 
 bool
 nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
+   struct disk_cache *disk_shader_cache,
struct pipe_debug_callback *debug)
 {
+   struct blob blob;
struct nv50_ir_prog_info *info;
struct nv50_ir_prog_info_out info_out = {};
-   int ret;
+
+
+
+   int ret = 0;
+   cache_key key;
+   bool shader_found = false;
 
info = CALLOC_STRUCT(nv50_ir_prog_info);
if (!info)
@@ -632,11 +640,44 @@ nvc0_program_translate(struct nvc0_program *prog, 
uint16_t chipset,
info->bin.smemSize = prog->cp.smem_size;
info->io.genUserClip = prog->vp.num_ucps;
 
-   ret = nv50_ir_generate_code(info, _out);
-   if (ret) {
-  NOUVEAU_ERR("shader translation failed: %i\n", ret);
-  goto out;
+   blob_init();
+
+   if (disk_shader_cache) {
+  void *cached_data = NULL;
+  size_t cached_size;
+
+  nv50_ir_prog_info_serialize(, info);
+  disk_cache_compute_key(disk_shader_cache, blob.data, blob.size, key);
+  cached_data = disk_cache_get(disk_shader_cache, key, _size);
+
+  if (cached_data && cached_size >= blob.size) { // blob.size is the size 
of serialized "info"
+ if (memcmp(cached_data, blob.data, blob.size) == 0) {
+shader_found = true;
+/* Blob contains only "info". In disk cache, "info_out" comes 
right after it */
+size_t offset = blob.size;
+if (!nv50_ir_prog_info_out_deserialize(cached_data, cached_size, 
offset, _out)) {
+   NOUVEAU_ERR("shader deserialization failed:\n");
+   goto out;
+}
+ }
+  }
+  free(cached_data);
+   }
+   if (!shader_found) {
+  ret = nv50_ir_generate_code(info, _out);
+  if (ret) {
+ NOUVEAU_ERR("shader translation failed: %i\n", ret);
+ goto out;
+  }
+  if (disk_shader_cache) {
+ if (!nv50_ir_prog_info_out_serialize(, _out)) {
+NOUVEAU_ERR("shader serialization failed:\n");
+goto out;
+ }
+ disk_cache_put(disk_shader_cache, key, blob.data, blob.size, NULL);
+  }
}
+   blob_finish();
 
prog->code = info_out.bin.code;
prog->code_size = info_out.bin.codeSize;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 774c5648113..4327a89454b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -54,7 +54,8 @@ nvc0_program_vali

[Mesa-dev] [PATCH v3 4/8] nv50/ir: Add prog_info_out print

2020-03-27 Thread Mark Menzynski
Adds a function for printing nv50_ir_prog_info_out structure
in JSON-like format, which could be used in debugging.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h  |   3 +
 .../drivers/nouveau/codegen/nv50_ir_print.cpp | 154 ++
 2 files changed, 157 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 591aa8f57e8..1bfaa8483ca 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -275,6 +275,9 @@ namespace nv50_ir
 }
 #endif
 
+extern void
+nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *);
+
 /* Serialize a nv50_ir_prog_info_out structure and save it into blob */
 extern bool MUST_CHECK
 nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 5dcbf3c3e0c..2c13bef5e1a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -22,6 +22,7 @@
 
 #include "codegen/nv50_ir.h"
 #include "codegen/nv50_ir_target.h"
+#include "codegen/nv50_ir_driver.h"
 
 #include 
 
@@ -852,3 +853,156 @@ Function::printLiveIntervals() const
 }
 
 } // namespace nv50_ir
+
+extern void
+nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *info_out)
+{
+   int i;
+
+   INFO("{\n");
+   INFO("   \"target\":\"%d\",\n", info_out->target);
+   INFO("   \"type\":\"%d\",\n", info_out->type);
+
+   // Bin
+   INFO("   \"bin\":{\n");
+   INFO("  \"maxGPR\":\"%d\",\n", info_out->bin.maxGPR);
+   INFO("  \"tlsSpace\":\"%d\",\n", info_out->bin.tlsSpace);
+   INFO("  \"smemSize\":\"%d\",\n", info_out->bin.smemSize);
+   INFO("  \"codeSize\":\"%d\",\n", info_out->bin.codeSize);
+   INFO("  \"instructions\":\"%d\",\n", info_out->bin.instructions);
+
+   // RelocInfo
+   INFO("  \"RelocInfo\":");
+   if (!info_out->bin.relocData) {
+  INFO("\"NULL\",\n");
+   } else {
+  nv50_ir::RelocInfo *reloc = (nv50_ir::RelocInfo 
*)info_out->bin.relocData;
+  INFO("{\n");
+  INFO(" \"codePos\":\"%d\",\n", reloc->codePos);
+  INFO(" \"libPos\":\"%d\",\n", reloc->libPos);
+  INFO(" \"dataPos\":\"%d\",\n", reloc->dataPos);
+  INFO(" \"count\":\"%d\",\n", reloc->count);
+  INFO(" \"RelocEntry\":[\n");
+  for (unsigned int i = 0; i < reloc->count; i++) {
+ INFO("
{\"data\":\"%d\",\t\"mask\":\"%d\",\t\"offset\":\"%d\",\t\"bitPos\":\"%d\",\t\"type\":\"%d\"}",
+   reloc->entry[i].data, reloc->entry[i].mask, 
reloc->entry[i].offset, reloc->entry[i].bitPos, reloc->entry[i].type
+   );
+  }
+  INFO("\n");
+  INFO(" ]\n");
+  INFO("  },\n");
+   }
+
+   // FixupInfo
+   INFO("  \"FixupInfo\":");
+   if (!info_out->bin.fixupData) {
+  INFO("\"NULL\"\n");
+   } else {
+  nv50_ir::FixupInfo *fixup = (nv50_ir::FixupInfo 
*)info_out->bin.fixupData;
+  INFO("{\n");
+  INFO(" \"count\":\"%d\"\n", fixup->count);
+  INFO(" \"FixupEntry\":[\n");
+  for (unsigned int i = 0; i < fixup->count; i++) {
+ INFO("
{\"apply\":\"%p\",\t\"ipa\":\"%d\",\t\"reg\":\"%d\",\t\"loc\":\"%d\"}\n",
+   fixup->entry[i].apply, fixup->entry[i].ipa, 
fixup->entry[i].reg, fixup->entry[i].loc);
+  }
+  INFO("\n");
+  INFO(" ]\n");
+  INFO("  }\n");
+
+  INFO("   },\n");
+   }
+
+   if (info_out->numSysVals) {
+  INFO("   \"sv\":[\n");
+  for (i = 0; i < info_out->numSysVals; i++) {
+ if (&(info_out->sv[i])) {
+INFO("  {\"id\":\"%d\", \"sn\":\"%d\", \"si\":\"%d\"}\n",
+   info_out->sv[i].id, info_out->sv[i].sn, inf

[Mesa-dev] [PATCH v3 1/8] nv50/ir: remove symbol table support for compute shaders

2020-03-27 Thread Mark Menzynski
From: Karol Herbst 

The initial plan was to use this for OpenCL kernels, but back then the
plan was to convert from LLVM to TGSI. As it turns out, we didn't went
that way.

Right now for OpenCL we don't reqiure supporting multiple entry points
inside the same binary and if we want to support it later, we can add
this back.

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.h |  2 --
 .../drivers/nouveau/codegen/nv50_ir_driver.h  |  2 --
 .../nouveau/codegen/nv50_ir_target.cpp| 23 ---
 .../drivers/nouveau/nv50/nv50_compute.c   | 17 +-
 .../drivers/nouveau/nv50/nv50_program.c   | 10 
 .../drivers/nouveau/nv50/nv50_program.h   |  2 --
 .../drivers/nouveau/nvc0/nvc0_compute.c   |  2 +-
 .../drivers/nouveau/nvc0/nvc0_context.h   |  2 --
 .../drivers/nouveau/nvc0/nvc0_program.c   | 21 -
 .../drivers/nouveau/nvc0/nvc0_program.h   |  2 --
 .../drivers/nouveau/nvc0/nve4_compute.c   |  4 ++--
 11 files changed, 4 insertions(+), 83 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 296b79f5d49..d2200fc4ea9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -1297,8 +1297,6 @@ public:
const Target *getTarget() const { return target; }
 
 private:
-   void emitSymbolTable(struct nv50_ir_prog_info *);
-
Type progType;
Target *target;
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 322bdd02557..55cc4c609f0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -97,8 +97,6 @@ struct nv50_ir_prog_info
   const void *source;
   void *relocData;
   void *fixupData;
-  struct nv50_ir_prog_symbol *syms;
-  uint16_t numSyms;
} bin;
 
struct nv50_ir_varying sv[PIPE_MAX_SHADER_INPUTS];
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 5c6d0570ae2..0af79e9d50a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -343,27 +343,6 @@ CodeEmitter::prepareEmission(BasicBlock *bb)
func->binSize += bb->binSize;
 }
 
-void
-Program::emitSymbolTable(struct nv50_ir_prog_info *info)
-{
-   unsigned int n = 0, nMax = allFuncs.getSize();
-
-   info->bin.syms =
-  (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
-
-   for (ArrayList::Iterator fi = allFuncs.iterator();
-!fi.end();
-fi.next(), ++n) {
-  Function *f = (Function *)fi.get();
-  assert(n < nMax);
-
-  info->bin.syms[n].label = f->getLabel();
-  info->bin.syms[n].offset = f->binPos;
-   }
-
-   info->bin.numSyms = n;
-}
-
 bool
 Program::emitBinary(struct nv50_ir_prog_info *info)
 {
@@ -403,8 +382,6 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
info->bin.relocData = emit->getRelocInfo();
info->bin.fixupData = emit->getFixupInfo();
 
-   emitSymbolTable(info);
-
// the nvc0 driver will print the binary iself together with the header
if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
   emit->printBinary();
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c 
b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
index d781f6fd7d4..1a78a371405 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -225,21 +225,6 @@ nv50_compute_upload_input(struct nv50_context *nv50, const 
uint32_t *input)
}
 }
 
-static uint32_t
-nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label)
-{
-   struct nv50_program *prog = nv50->compprog;
-   const struct nv50_ir_prog_symbol *syms =
-  (const struct nv50_ir_prog_symbol *)prog->cp.syms;
-   unsigned i;
-
-   for (i = 0; i < prog->cp.num_syms; ++i) {
-  if (syms[i].label == label)
- return prog->code_base + syms[i].offset;
-   }
-   return prog->code_base; /* no symbols or symbol not found */
-}
-
 void
 nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
 {
@@ -258,7 +243,7 @@ nv50_launch_grid(struct pipe_context *pipe, const struct 
pipe_grid_info *info)
nv50_compute_upload_input(nv50, info->input);
 
BEGIN_NV04(push, NV50_CP(CP_START_ID), 1);
-   PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc));
+   PUSH_DATA (push, cp->code_base);
 
BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1);
PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c 
b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index c9d01e8cee7..58c0463f1a2 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ 

[Mesa-dev] [RFC PATCH v2 6/6] nv50: Add shader disk caching

2020-03-19 Thread Mark Menzynski
Adds shader disk caching for nv50 to reduce the need to every time compile
shaders. Shaders are saved into disk_shader_cache from nv50_screen structure.

It serializes the input nv50_ir_prog_info to compute the hash key and
also to do a byte compare between the original nv50_ir_prog_info and the one
saved in the cache. If keys match and also the byte compare returns they
are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the
cache can be used instead of compiling input info.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/nv50/nv50_program.c   | 276 +++---
 .../drivers/nouveau/nv50/nv50_program.h   |   2 +
 .../drivers/nouveau/nv50/nv50_shader_state.c  |   4 +-
 src/gallium/drivers/nouveau/nv50/nv50_state.c |   1 +
 4 files changed, 47 insertions(+), 236 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c 
b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index b5e36cf488d..156ac286a7f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -37,108 +37,6 @@ bitcount4(const uint32_t val)
return cnt[val & 0xf];
 }
 
-static int
-nv50_vertprog_assign_slots(struct nv50_ir_prog_info_out *info)
-{
-   struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
-   unsigned i, n, c;
-
-   n = 0;
-   for (i = 0; i < info->numInputs; ++i) {
-  prog->in[i].id = i;
-  prog->in[i].sn = info->in[i].sn;
-  prog->in[i].si = info->in[i].si;
-  prog->in[i].hw = n;
-  prog->in[i].mask = info->in[i].mask;
-
-  prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
-
-  for (c = 0; c < 4; ++c)
- if (info->in[i].mask & (1 << c))
-info->in[i].slot[c] = n++;
-
-  if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
- prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
-   }
-   prog->in_nr = info->numInputs;
-
-   for (i = 0; i < info->numSysVals; ++i) {
-  switch (info->sv[i].sn) {
-  case TGSI_SEMANTIC_INSTANCEID:
- prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
- continue;
-  case TGSI_SEMANTIC_VERTEXID:
- prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
- prog->vp.attrs[2] |= 
NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
- continue;
-  default:
- break;
-  }
-   }
-
-   /*
-* Corner case: VP has no inputs, but we will still need to submit data to
-* draw it. HW will shout at us and won't draw anything if we don't enable
-* any input, so let's just pretend it's the first one.
-*/
-   if (prog->vp.attrs[0] == 0 &&
-   prog->vp.attrs[1] == 0 &&
-   prog->vp.attrs[2] == 0)
-  prog->vp.attrs[0] |= 0xf;
-
-   /* VertexID before InstanceID */
-   if (info->io.vertexId < info->numSysVals)
-  info->sv[info->io.vertexId].slot[0] = n++;
-   if (info->io.instanceId < info->numSysVals)
-  info->sv[info->io.instanceId].slot[0] = n++;
-
-   n = 0;
-   for (i = 0; i < info->numOutputs; ++i) {
-  switch (info->out[i].sn) {
-  case TGSI_SEMANTIC_PSIZE:
- prog->vp.psiz = i;
- break;
-  case TGSI_SEMANTIC_CLIPDIST:
- prog->vp.clpd[info->out[i].si] = n;
- break;
-  case TGSI_SEMANTIC_EDGEFLAG:
- prog->vp.edgeflag = i;
- break;
-  case TGSI_SEMANTIC_BCOLOR:
- prog->vp.bfc[info->out[i].si] = i;
- break;
-  case TGSI_SEMANTIC_LAYER:
- prog->gp.has_layer = true;
- prog->gp.layerid = n;
- break;
-  case TGSI_SEMANTIC_VIEWPORT_INDEX:
- prog->gp.has_viewport = true;
- prog->gp.viewportid = n;
- break;
-  default:
- break;
-  }
-  prog->out[i].id = i;
-  prog->out[i].sn = info->out[i].sn;
-  prog->out[i].si = info->out[i].si;
-  prog->out[i].hw = n;
-  prog->out[i].mask = info->out[i].mask;
-
-  for (c = 0; c < 4; ++c)
- if (info->out[i].mask & (1 << c))
-info->out[i].slot[c] = n++;
-   }
-   prog->out_nr = info->numOutputs;
-   prog->max_out = n;
-   if (!prog->max_out)
-  prog->max_out = 1;
-
-   if (prog->vp.psiz < info->numOutputs)
-  prog->vp.psiz = prog->out[prog->vp.psiz].hw;
-
-   return 0;
-}
-
 static int
 nv50_vertprog_assign_slots_info(struct nv50_ir_prog_info_out *info)
 {
@@ -263,115 +161,6 @@ nv50_vertprog_assign_slots_prog(struct 
nv50_ir_prog_info_out *info)
return 0;
 }
 
-static int
-nv50_fragprog_assign_slots(struct nv50_ir_prog_info_out *info)
-{
-   struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
-   unsigned i, n, m, c;
-   unsigned nvary;
-   unsi

[Mesa-dev] [RFC PATCH v2 1/6] nv50/ir: add nv50_ir_prog_info_out

2020-03-19 Thread Mark Menzynski
From: Karol Herbst 

Split out the output relevant fields from the nv50_ir_prog_info struct
in order to have a cleaner separation between the input and output of
the compilation.

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir.cpp   |  49 ++--
 src/gallium/drivers/nouveau/codegen/nv50_ir.h |   9 +-
 .../drivers/nouveau/codegen/nv50_ir_driver.h  | 117 +---
 .../nouveau/codegen/nv50_ir_from_common.cpp   |  14 +-
 .../nouveau/codegen/nv50_ir_from_common.h |   3 +-
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 204 +++---
 .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 256 +-
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp |   6 +-
 .../nouveau/codegen/nv50_ir_target.cpp|   2 +-
 .../drivers/nouveau/codegen/nv50_ir_target.h  |   5 +-
 .../nouveau/codegen/nv50_ir_target_nv50.cpp   |  17 +-
 .../nouveau/codegen/nv50_ir_target_nv50.h |   3 +-
 .../drivers/nouveau/nouveau_compiler.c|   9 +-
 .../drivers/nouveau/nv50/nv50_program.c   |  62 +++--
 .../drivers/nouveau/nvc0/nvc0_program.c   |  87 +++---
 15 files changed, 449 insertions(+), 394 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index c65853578f6..c2c5956874a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1241,15 +1241,18 @@ void Program::releaseValue(Value *value)
 extern "C" {
 
 static void
-nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
+nv50_ir_init_prog_info(struct nv50_ir_prog_info *info,
+   struct nv50_ir_prog_info_out *info_out)
 {
+   info_out->target = info->target;
+   info_out->type = info->type;
if (info->type == PIPE_SHADER_TESS_CTRL || info->type == 
PIPE_SHADER_TESS_EVAL) {
-  info->prop.tp.domain = PIPE_PRIM_MAX;
-  info->prop.tp.outputPrim = PIPE_PRIM_MAX;
+  info_out->prop.tp.domain = PIPE_PRIM_MAX;
+  info_out->prop.tp.outputPrim = PIPE_PRIM_MAX;
}
if (info->type == PIPE_SHADER_GEOMETRY) {
-  info->prop.gp.instanceCount = 1;
-  info->prop.gp.maxVertices = 1;
+  info_out->prop.gp.instanceCount = 1;
+  info_out->prop.gp.maxVertices = 1;
}
if (info->type == PIPE_SHADER_COMPUTE) {
   info->prop.cp.numThreads[0] =
@@ -1257,23 +1260,26 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
   info->prop.cp.numThreads[2] = 1;
}
info->io.pointSize = 0xff;
-   info->io.instanceId = 0xff;
-   info->io.vertexId = 0xff;
-   info->io.edgeFlagIn = 0xff;
-   info->io.edgeFlagOut = 0xff;
-   info->io.fragDepth = 0xff;
-   info->io.sampleMask = 0xff;
+   info_out->bin.smemSize = info->bin.smemSize;
+   info_out->io.genUserClip = info->io.genUserClip;
+   info_out->io.instanceId = 0xff;
+   info_out->io.vertexId = 0xff;
+   info_out->io.edgeFlagIn = 0xff;
+   info_out->io.edgeFlagOut = 0xff;
+   info_out->io.fragDepth = 0xff;
+   info_out->io.sampleMask = 0xff;
info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff;
 }
 
 int
-nv50_ir_generate_code(struct nv50_ir_prog_info *info)
+nv50_ir_generate_code(struct nv50_ir_prog_info *info,
+  struct nv50_ir_prog_info_out *info_out)
 {
int ret = 0;
 
nv50_ir::Program::Type type;
 
-   nv50_ir_init_prog_info(info);
+   nv50_ir_init_prog_info(info, info_out);
 
 #define PROG_TYPE_CASE(a, b)  \
case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
@@ -1301,15 +1307,16 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
   return -1;
}
prog->driver = info;
+   prog->driver_out = info_out;
prog->dbgFlags = info->dbgFlags;
prog->optLevel = info->optLevel;
 
switch (info->bin.sourceRep) {
case PIPE_SHADER_IR_NIR:
-  ret = prog->makeFromNIR(info) ? 0 : -2;
+  ret = prog->makeFromNIR(info, info_out) ? 0 : -2;
   break;
case PIPE_SHADER_IR_TGSI:
-  ret = prog->makeFromTGSI(info) ? 0 : -2;
+  ret = prog->makeFromTGSI(info, info_out) ? 0 : -2;
   break;
default:
   ret = -1;
@@ -1320,7 +1327,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
   prog->print();
 
-   targ->parseDriverInfo(info);
+   targ->parseDriverInfo(info, info_out);
prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
 
prog->convertToSSA();
@@ -1342,7 +1349,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
 
prog->optimizePostRA(info->optLevel);
 
-   if (!prog->emitBinary(info)) {
+   if (!prog->emitBinary(info_out)) {
   ret = -5;
   goto out;
}
@@ -1350,10 +1357,10 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
 out:
INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
 
-   info->bin.maxGPR = prog->maxGPR;
-   info->bin.code = prog->code;
-   info->bin.codeSize = prog->binSize;
-   info->bin.tlsSpace = 

[Mesa-dev] [RFC PATCH v2 4/6] nv50/ir: Add nv50_ir_prog_info serialize

2020-03-19 Thread Mark Menzynski
Adds a function for serializing a nv50_ir_prog_info structure, which is
needed for shader caching.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h  |  4 +
 .../nouveau/codegen/nv50_ir_serialize.cpp | 81 +++
 .../drivers/nouveau/nvc0/nvc0_context.h   |  1 +
 .../drivers/nouveau/nvc0/nvc0_program.c   | 43 --
 .../drivers/nouveau/nvc0/nvc0_shader_state.c  |  3 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c |  2 +
 6 files changed, 128 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 10ae5cbe420..3728470ab45 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -278,6 +278,10 @@ namespace nv50_ir
 extern void
 nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *);
 
+/* Serialize a nv50_ir_prog_info structure and save it into blob */
+extern bool
+nv50_ir_prog_info_serialize(struct blob *, struct nv50_ir_prog_info *);
+
 /* Serialize a nv50_ir_prog_info_out structure and save it into blob */
 extern bool
 nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
index 5671483bd4e..b640cb67503 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
@@ -17,6 +17,87 @@ enum FixupApplyFunc {
FLIP_GM107 = 7
 };
 
+extern bool
+nv50_ir_prog_info_serialize(struct blob *blob, struct nv50_ir_prog_info *info)
+{
+   blob_write_uint16(blob, info->target);
+   blob_write_uint8(blob, info->type);
+   blob_write_uint8(blob, info->optLevel);
+   blob_write_uint8(blob, info->dbgFlags);
+   blob_write_uint8(blob, info->omitLineNum);
+   blob_write_uint32(blob, info->bin.smemSize);
+   blob_write_uint16(blob, info->bin.maxOutput);
+   blob_write_uint8(blob, info->bin.sourceRep);
+
+   switch(info->bin.sourceRep) {
+  case PIPE_SHADER_IR_TGSI: {
+ struct tgsi_token *tokens = (struct tgsi_token *)info->bin.source;
+ unsigned int num_tokens = tgsi_num_tokens(tokens);
+
+ blob_write_uint32(blob, num_tokens);
+ blob_write_bytes(blob, tokens, num_tokens * sizeof(struct 
tgsi_token));
+ break;
+  }
+  case PIPE_SHADER_IR_NIR: {
+ struct nir_shader *nir = (struct nir_shader *)info->bin.source;
+ nir_serialize(blob, nir, false);
+ break;
+  }
+  default:
+ assert(!"unhandled info->bin.sourceRep");
+ return false;
+   }
+
+   blob_write_uint16(blob, info->immd.bufSize);
+   blob_write_bytes(blob, info->immd.buf, info->immd.bufSize * 
sizeof(*info->immd.buf));
+   blob_write_uint16(blob, info->immd.count);
+   blob_write_bytes(blob, info->immd.data, info->immd.count * 
sizeof(*info->immd.data));
+   blob_write_bytes(blob, info->immd.type, info->immd.count * 16); // for each 
vec4 (128 bit)
+
+   switch (info->type) {
+  case PIPE_SHADER_VERTEX:
+ blob_write_bytes(blob, info->prop.vp.inputMask,
+  4 * sizeof(*info->prop.vp.inputMask)); /* array of 
size 4 */
+ break;
+  case PIPE_SHADER_TESS_CTRL:
+ blob_write_uint32(blob, info->prop.cp.inputOffset);
+ blob_write_uint32(blob, info->prop.cp.sharedOffset);
+ blob_write_uint32(blob, info->prop.cp.gridInfoBase);
+ blob_write_bytes(blob, info->prop.cp.numThreads,
+  3 * sizeof(*info->prop.cp.numThreads)); /* array of 
size 3 */
+  case PIPE_SHADER_GEOMETRY:
+ blob_write_uint8(blob, info->prop.gp.inputPrim);
+ break;
+  case PIPE_SHADER_FRAGMENT:
+ blob_write_uint8(blob, info->prop.fp.persampleInvocation);
+ break;
+  default:
+ break;
+   }
+
+   blob_write_uint8(blob, info->io.auxCBSlot);
+   blob_write_uint16(blob, info->io.ucpBase);
+   blob_write_uint16(blob, info->io.drawInfoBase);
+   blob_write_uint16(blob, info->io.alphaRefBase);
+   blob_write_uint8(blob, info->io.pointSize);
+   blob_write_uint8(blob, info->io.viewportId);
+   blob_write_bytes(blob, info->io.backFaceColor, 2 * 
sizeof(*info->io.backFaceColor));
+   blob_write_uint8(blob, info->io.mul_zero_wins);
+   blob_write_uint8(blob, info->io.nv50styleSurfaces);
+   blob_write_uint16(blob, info->io.texBindBase);
+   blob_write_uint16(blob, info->io.fbtexBindBase);
+   blob_write_uint16(blob, info->io.suInfoBase);
+   blob_write_uint16(blob, info->io.bindlessBase);
+   blob_write_uint16(blob, info->io.bufInfoBase);
+   blob_write_uint16(blob, info->io.sampleInfoBase);
+   blob_write_uint8(blob, info->io.msInfoCBSlot);
+   blob_wr

[Mesa-dev] [RFC PATCH v2 2/6] nv50/ir: Add nv50_ir_prog_info_out serialize and deserialize

2020-03-19 Thread Mark Menzynski
Adds functions for serializing and deserializing
nv50_ir_prog_info_out structure, which are needed for shader caching.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h  |  44 
 .../nouveau/codegen/nv50_ir_emit_gk110.cpp|  14 +-
 .../nouveau/codegen/nv50_ir_emit_gm107.cpp|  14 +-
 .../nouveau/codegen/nv50_ir_emit_nv50.cpp |   6 +-
 .../nouveau/codegen/nv50_ir_emit_nvc0.cpp |  14 +-
 .../nouveau/codegen/nv50_ir_serialize.cpp | 196 ++
 src/gallium/drivers/nouveau/meson.build   |   1 +
 7 files changed, 265 insertions(+), 24 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index dab1ce030cb..eea32133ccf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -25,6 +25,7 @@
 
 #include "pipe/p_shader_tokens.h"
 
+#include "util/blob.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_scan.h"
@@ -242,6 +243,49 @@ nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
 extern void nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size);
 
+
+#ifdef __cplusplus
+namespace nv50_ir
+{
+   class FixupEntry;
+   class FixupData;
+
+   void
+   gk110_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+ const nv50_ir::FixupData& data);
+   void
+   gm107_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+ const nv50_ir::FixupData& data);
+   void
+   nv50_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+const nv50_ir::FixupData& data);
+   void
+   nvc0_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+const nv50_ir::FixupData& data);
+   void
+   gk110_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
+  const nv50_ir::FixupData& data);
+   void
+   gm107_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
+  const nv50_ir::FixupData& data);
+   void
+   nvc0_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
+ const nv50_ir::FixupData& data);
+
+}
+#endif
+
+/* Serialize a nv50_ir_prog_info_out structure and save it into blob */
+extern bool
+nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *);
+
+/* Deserialize from data and save into a nv50_ir_prog_info_out structure
+ * using a pointer. Size is a total size of the serialized data.
+ * Offset points to where info_out in data is located. */
+extern bool
+nv50_ir_prog_info_out_deserialize(void *data, size_t size, size_t offset,
+  struct nv50_ir_prog_info_out *);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 2118c3153f7..e651d7fdcb0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1209,8 +1209,8 @@ CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
}
 }
 
-static void
-selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+void
+gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
 {
int loc = entry->loc;
if (data.force_persample_interp)
@@ -1227,7 +1227,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i)
   code[1] |= 1 << 13;
 
if (i->subOp == 1) {
-  addInterp(0, 0, selpFlip);
+  addInterp(0, 0, gk110_selpFlip);
}
 }
 
@@ -2042,8 +2042,8 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i)
code[1] |= (i->ipa & 0xc) << (19 - 2);
 }
 
-static void
-interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+void
+gk110_interpApply(const struct FixupEntry *entry, uint32_t *code, const 
FixupData& data)
 {
int ipa = entry->ipa;
int reg = entry->reg;
@@ -2078,10 +2078,10 @@ CodeEmitterGK110::emitINTERP(const Instruction *i)
 
if (i->op == OP_PINTERP) {
   srcId(i->src(1), 23);
-  addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
+  addInterp(i->ipa, SDATA(i->src(1)).id, gk110_interpApply);
} else {
   code[0] |= 0xff << 23;
-  addInterp(i->ipa, 0xff, interpApply);
+  addInterp(i->ipa, 0xff, gk110_interpApply);
}
 
srcId(i->src(0).getIndirect(0), 10);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index e244bd0d610..4970f14cb33 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_e

[Mesa-dev] [RFC PATCH v2 3/6] nv50/ir: Add prog_info_out print

2020-03-19 Thread Mark Menzynski
Adds a function for printing nv50_ir_prog_info_out structure
in JSON-like format, which could be used in debugging.

Signed-off-by: Mark Menzynski 
---
 .../nouveau/codegen/.nv50_ir_from_nir.cpp.swp | Bin 0 -> 16384 bytes
 .../drivers/nouveau/codegen/nv50_ir_driver.h  |   3 +
 .../drivers/nouveau/codegen/nv50_ir_print.cpp | 154 ++
 3 files changed, 157 insertions(+)
 create mode 100644 
src/gallium/drivers/nouveau/codegen/.nv50_ir_from_nir.cpp.swp

diff --git a/src/gallium/drivers/nouveau/codegen/.nv50_ir_from_nir.cpp.swp 
b/src/gallium/drivers/nouveau/codegen/.nv50_ir_from_nir.cpp.swp
new file mode 100644
index 
..c405065a5df6c33ee4f0439c30c474d446b87730
GIT binary patch
literal 16384
zcmeHOYmgjO6>bBHf+(U`l!A4$5!hXaojg#f-DJt^>}FK^R)263;
z=*Mn0F;N7|@&{n4WtEm?DVBv`4+)c+9$@lpCm@dx4q4T2IBQPl6;dwU+8O|n1y
zW%X9R>`b3~(IOp`;=FU}piQPE0Tf@&~ns(*=bGKb~)t9u_-J)sTF87XZcp;I_
z_2c@M>2~}uni8LEziT?)CLK;B4D^Ft@{1BZwCE)UIo7+CqX>AFtb(*q#)B~L
z`SBVzb`o9(Z9`*cGivFUa->aT)r~#63{-x-L75yS5P(uHwqTi|LL5lu@qTi?J&!*`A
zQ1n32-+zJpknn$A(Qi}q(<%Dj6@5+7YZs38|C^$}OVPidqCcnT#})mWmyY&7t>{VJ
zSw(kL|I1!B+W)Vq@i$WRzbN`URsUb4=+7v6qW`XoM)^OT;(uR?{%1u$r1+mo
z(VtRuuIQzgkMcjI=*hf#Fh&1UivF5cjQ0PdqJK#7Ka!$9sp$Rreer1jKPY-4Upqzr
zy`m@c=iwCncd7AbFB#?kTSdQB$#*hEe?rmM6@AAmNBgVksO?wuPp0VVarP^-KxTo=
z0+|Ie3uG3^ERb0svp{Bn%mQb%fN5(QrE1@$L>TY?ll`JqnYTz_-s*}Ly
zfxCgbfDTXvbRY*@16*S_cZVu;Bnw2@J--Tz{i320VZ$@a0u8B+yu-37XsUW
zN0Dbf0z3?S4)`qa31A*50yh9}0k#1@grV*N3c!WH1;EdcN8bxLz}z#D-dBf0w^Fb6!3y2a0dPXl#e1~>)Zd=DTWeTV#{e%M)l
zn!fKcC#>UEh8eJZtYx|(pP13mJa)Wj$}PZ64{K?F8J4#!1KWILdU|VFbddY
zPPDxdO~b)QE5}w%H|A{e+8nXV=b0IXahKc7k2*Y9cS5e7_s4fIjAceVWKqYA@E>QU
zZ3jFI9j^^BSt#Vpu{CbpFiIj;XO$Q?Muwyu2qqPUWKk#Z<90`kD5dV1f!XB|52lom
zdP`7Se(c$7O=#9JL+1GrGnwVX5~de1bc=FBY+$>P1w4uakK2j5WVP6e9CTvie5yUg
zB7S6oIUe%^n+L4r2jD0!varY(%wach^+TSBW0@g`wL>35I=B|Y);Y$xBZixpgUd6v
z13OANL$GBbhj-AJwH%k*c;qQ*q7Lkee_dS4aW|L=A$s8G95qQm8ovoIG+m7Ev+%If
zgPd|QpqmHXTYO!yqRBy~#_>N@OS9PfyJRmx958tR`OdkPJbRvOtI1$$5*rah$6exSL?is8SYpkqxXGi_`P!oVDm6yld{2_iLGM}*S`)=
zj<9OdqomQMYLJUzg$(
zO{>F~u-f`exEFU>+ciwf!p>vd5u2{la(ICD;BiHo9hSZ;TDGjWwQQ
zL;B6KiW3M`m&~5nRL|Er*}_CmA#{0(cdM-y)^E5a4zvi*aON!2lRaUpk|+-dLFRcY
z-7Spl>1k}kxR|$BHl3VJCg_aT!?ir@KhtC{;ejCb#+Chwa`N(Ed^zyj*kx^+%&
zk;^5yz1KqnOvjclny4#%{fCAh%SqLXo33L`YsWOp#Tmo;Bd}>EX@f^azuU#$ryVEJ
zTUg_uAp(if3w<41qHm
zkEAMtzR)B;A`-PBkq1rE4a3!>m}dJ&%my-_LE>#`ABF4P)&
zaiv_SvE`N8ausqHD)X#Tt>~478Z1y+DpeXd_S!osDSbecjE)y?(X}PSI=5wGH
z%PaF_usN8jQf)w#CB0FahcZHP7`9fZG;|VUfi0D4#YKEgjFRY=xPa+(wp^>O>hs_*
zR)CA@W3U{oO`cG^4iP6(^JcvQ5lZh^u903sOJayy&^WewxEd{DyclzV=wRJwK2E
zrizj3ek)ox1BxGEX9}#F{u-Tmy^X+WcOnX89c)S_qFWIXVgYm=1m1`N6)Eh{aEJ!g
zzF^}6xDmsII|Y(0#JWz{LEMLc(MHVZBTS8(@5Wg{Pbgq(`7YKP#q`)kZAS=-vjX~f
zc3uRlLwd;=FBu^xCG?q)4*@rBe^VR{*=3=L%-yDHM
zP96_oC(&9Yf9(oC0P=mvyh<1r>A_-eZph-s%@)L9w@qA9%hEz+)E`sSu
z1k=Qfq>yndOcg4p2%T_p`4(HmqhL;P+%N5wRmtps~l2!N(?qXv+g9fgLsj-rQg~
zQ$29Ou@!JmV_6-Fl8~187-~U!#r|Z#f$(-#gCvVc!*tJ@((;oU
z<^Q)Mw|x(CVaosatK9OZ$oGE^KPR{&2U@BbEX0$2v#44g*p{}}Kq;48qL
zzzlE+a53;WY5|V|KLWlDd;~ZFYyd|9su#Qs7z3^bt^%G$P2dN>L%?T%yMZpy0ouSp
zfNBU0U>~p-7zeHc#(<|#Bls%tW#A}q0HFH86@Ugj0i7QKz5r0Y;A6l?0kXw0Ap6NI
zkXazJKxTo=0+|Ie3;eGwaGdJrgX~j}Qg|Ai}3w
zCDLhC144^Ifu>It0*E>g#n3I}42k4Ys^J0=K7+BW<%*io42nS*=*%Bg
zusNf`B#TM?F*Q}2nx5{Ohxr2jinOBF#?RK7O$w1U(tNmxqDEmTrU(bd9=XsSIC9j1
zcq;(UXb@Udr=;+zMkQX%Bt8%pUvuzhvE!}S6Pd?^Fg;lbZx1+XnnptDHxbsIAYmkD
zYK%f1s$R(KPl?gJ-Hfr`%WZQnLH#Qh=yCl^``E4Q*H!;Xoc=rOl
z8)5KQiZR~Jgq+;i=Smg`*E#Qz>QV$Ovjb+(G~1LLyYvEsl0QmyrSs{fjnEd1jO9YT
zZmdeX5id2HWkm{wd8Ax$sbsua`+#?mgrgXQ+WVRrIP{tYFLj6zi>A
zD-<+n{

literal 0
HcmV?d1

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index eea32133ccf..10ae5cbe420 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -275,6 +275,9 @@ namespace nv50_ir
 }
 #endif
 
+extern void
+nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *);
+
 /* Serialize a nv50_ir_prog_info_out structure and save it into blob */
 extern bool
 nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 5dcbf3c3e0c..2c13bef5e1a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -22,6 +22,7 @@
 
 #include "codegen/nv50_ir.h"
 #include "codegen/nv50_ir_target.h"
+#include "codegen/nv50_ir_driver.h"
 
 #include 
 
@@ -852,3 +853,156 @@ Function::printLiveIntervals() const
 }
 
 } // namespace nv50_ir
+
+extern void
+nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *info_out)
+{
+   int i;
+
+   INFO("{\n");
+   INFO("   \&q

[Mesa-dev] [RFC PATCH v2 5/6] nv50: Add separate functions for varying bits

2020-03-19 Thread Mark Menzynski
This separation will be needed for shader disk caching. The reason for it
is that when loading shaders from cache, data in info structure already gets
loaded. That means varying bits for info is needed only when compiling
shaders and not needed when loading from cache. Varying bits for prog are
needed in both cases.

Unfortunately, I don't know how most of the code works, I have separated
this manually, only by looking at the original code. That means that this
patch is experimental. Together with following commit it works
(there seem to be no regressions at all in VK-GL-CTS
[openglcts/data/mustpass/gl/khronos_mustpass/4.6.1.x/gl33-master.txt]
and all benchmarks behaved normally). Unfortunately, I cannot test in
Piglit because of technical problems, so there might be still some
work needed.
I am mainly asking to help with the function names,
look for bugs and pointing out useless code. I will be glad for every
review.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/nv50/nv50_program.c   | 344 ++
 1 file changed, 344 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c 
b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 924120eecdf..b5e36cf488d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -139,6 +139,130 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info_out 
*info)
return 0;
 }
 
+static int
+nv50_vertprog_assign_slots_info(struct nv50_ir_prog_info_out *info)
+{
+   unsigned i, n, c;
+
+   n = 0;
+   for (i = 0; i < info->numInputs; ++i) {
+   for (c = 0; c < 4; ++c)
+ if (info->in[i].mask & (1 << c))
+info->in[i].slot[c] = n++;
+   }
+
+   /* VertexID before InstanceID */
+   if (info->io.vertexId < info->numSysVals)
+  info->sv[info->io.vertexId].slot[0] = n++;
+   if (info->io.instanceId < info->numSysVals)
+  info->sv[info->io.instanceId].slot[0] = n++;
+
+   n = 0;
+   for (i = 0; i < info->numOutputs; ++i) {
+  for (c = 0; c < 4; ++c)
+ if (info->out[i].mask & (1 << c))
+info->out[i].slot[c] = n++;
+   }
+
+   return 0;
+}
+
+static int
+nv50_vertprog_assign_slots_prog(struct nv50_ir_prog_info_out *info)
+{
+   struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
+   unsigned i, n, c;
+
+   n = 0;
+   for (i = 0; i < info->numInputs; ++i) {
+  prog->in[i].id = i;
+  prog->in[i].sn = info->in[i].sn;
+  prog->in[i].si = info->in[i].si;
+  prog->in[i].hw = n;
+  prog->in[i].mask = info->in[i].mask;
+
+  prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
+
+  for (c = 0; c < 4; ++c)
+ if (info->in[i].mask & (1 << c))
+n++;
+
+  if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
+   }
+   prog->in_nr = info->numInputs;
+
+   for (i = 0; i < info->numSysVals; ++i) {
+  switch (info->sv[i].sn) {
+  case TGSI_SEMANTIC_INSTANCEID:
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
+ continue;
+  case TGSI_SEMANTIC_VERTEXID:
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
+ prog->vp.attrs[2] |= 
NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
+ continue;
+  default:
+ break;
+  }
+   }
+
+   /*
+* Corner case: VP has no inputs, but we will still need to submit data to
+* draw it. HW will shout at us and won't draw anything if we don't enable
+* any input, so let's just pretend it's the first one.
+*/
+   if (prog->vp.attrs[0] == 0 &&
+   prog->vp.attrs[1] == 0 &&
+   prog->vp.attrs[2] == 0)
+  prog->vp.attrs[0] |= 0xf;
+
+   n = 0;
+   for (i = 0; i < info->numOutputs; ++i) {
+  switch (info->out[i].sn) {
+  case TGSI_SEMANTIC_PSIZE:
+ prog->vp.psiz = i;
+ break;
+  case TGSI_SEMANTIC_CLIPDIST:
+ prog->vp.clpd[info->out[i].si] = n;
+ break;
+  case TGSI_SEMANTIC_EDGEFLAG:
+ prog->vp.edgeflag = i;
+ break;
+  case TGSI_SEMANTIC_BCOLOR:
+ prog->vp.bfc[info->out[i].si] = i;
+ break;
+  case TGSI_SEMANTIC_LAYER:
+ prog->gp.has_layer = true;
+ prog->gp.layerid = n;
+ break;
+  case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ prog->gp.has_viewport = true;
+ prog->gp.viewportid = n;
+ break;
+  default:
+ break;
+  }
+  prog->out[i].id = i;
+  prog->out[i].sn = info->out[i].sn;
+  prog->out[i].si = info->out[i].si;
+  prog->out[i].hw = n;
+  prog->out[i].mask = info->out[i].mask;
+
+  for (c = 0; c < 4; ++c)
+  

[Mesa-dev] [RFC PATCH 2/2] nv50: Add shader disk caching

2020-03-19 Thread Mark Menzynski
Adds shader disk caching for nv50 to reduce the need to every time compile
shaders. Shaders are saved into disk_shader_cache from nv50_screen structure.

It can be disabled with MESA_GLSL_CACHE_DISABLE=1.

It serializes the input nv50_ir_prog_info to compute the hash key and
also to do a byte compare between the original nv50_ir_prog_info and the one
saved in the cache. If keys match and also the byte compare returns they
are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the
cache can be used instead of compiling input info.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/nv50/nv50_program.c   | 55 ---
 .../drivers/nouveau/nv50/nv50_program.h   |  2 +
 .../drivers/nouveau/nv50/nv50_shader_state.c  |  4 +-
 src/gallium/drivers/nouveau/nv50/nv50_state.c |  1 +
 4 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c 
b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index bf63b20f613..0b85267f36f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -667,10 +667,21 @@ nv50_program_create_strmout_state(const struct 
nv50_ir_prog_info_out *info,
 
 bool
 nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
+   struct disk_cache *disk_shader_cache,
struct pipe_debug_callback *debug)
 {
+   struct blob blob;
struct nv50_ir_prog_info *info;
-   int i, ret;
+   struct nv50_ir_prog_info_out info_out = {};
+
+   void *cached_data = NULL;
+   size_t cached_size;
+   bool shader_found = false;
+
+   int i;
+   int ret = 0;
+   cache_key key;
+
const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80;
 
info = CALLOC_STRUCT(nv50_ir_prog_info);
@@ -704,7 +715,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t 
chipset,
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET;
 
-   info->assignSlots = nv50_program_assign_varying_slots;
+   info->assignSlots = nv50_program_assign_varying_slots_info;
 
prog->vp.bfc[0] = 0xff;
prog->vp.bfc[1] = 0xff;
@@ -726,16 +737,42 @@ nv50_program_translate(struct nv50_program *prog, 
uint16_t chipset,
info->optLevel = 3;
 #endif
 
-   struct nv50_ir_prog_info_out info_out = {};
/* these fields might be overwritten by the compiler */
-   info_out.bin.smemSize = prog->cp.smem_size;
-   info_out.io.genUserClip = prog->vp.clpd_nr;
+   info->bin.smemSize = prog->cp.smem_size;
+   info->io.genUserClip = prog->vp.clpd_nr;
+
+   blob_init();
+
+   if (disk_shader_cache) {
+  nv50_ir_prog_info_serialize(, info);
+  disk_cache_compute_key(disk_shader_cache, blob.data, blob.size, key);
+  cached_data = disk_cache_get(disk_shader_cache, key, _size);
+
+  if (cached_data && cached_size >= blob.size) { // blob.size is the size 
of serialized "info"
+ if (memcmp(cached_data, blob.data, blob.size) == 0) {
+shader_found = true;
+/* Blob contains only "info". In disk cache, "info_out" comes 
right after it */
+size_t offset = blob.size;
+nv50_ir_prog_info_out_deserialize(cached_data, cached_size, 
offset, _out);
+ }
+  }
+  free(cached_data);
+   }
info_out.driverPriv = prog;
-   ret = nv50_ir_generate_code(info, _out);
-   if (ret) {
-  NOUVEAU_ERR("shader translation failed: %i\n", ret);
-  goto out;
+
+   if (!shader_found) {
+  ret = nv50_ir_generate_code(info, _out);
+  if (ret) {
+ NOUVEAU_ERR("shader translation failed: %i\n", ret);
+ goto out;
+  }
+  if (disk_shader_cache) {
+ nv50_ir_prog_info_out_serialize(, _out);
+ disk_cache_put(disk_shader_cache, key, blob.data, blob.size, NULL);
+  }
}
+   blob_finish();
+   nv50_program_assign_varying_slots_prog(_out);
 
prog->code = info_out.bin.code;
prog->code_size = info_out.bin.codeSize;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h 
b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index 1a89e0d5067..528e1d01fa1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -116,7 +116,9 @@ struct nv50_program {
struct nv50_stream_output_state *so;
 };
 
+struct disk_cache;
 bool nv50_program_translate(struct nv50_program *, uint16_t chipset,
+struct disk_cache *,
 struct pipe_debug_callback *);
 bool nv50_program_upload_code(struct nv50_context *, struct nv50_program *);
 void nv50_program_destroy(struct nv50_context *, struct nv50_program *);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c 
b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index 2cbbdc0cc35..65891108464 100644
--- a/src/gallium/drivers

[Mesa-dev] [RFC PATCH 1/2] nv50: Add separate functions for varying bits

2020-03-19 Thread Mark Menzynski
This separation will be needed for shader disk caching. The reason for it
is that when loading shaders from cache, data in info structure already gets
loaded. That means varying bits for info is needed only when compiling
shaders and not needed when loading from cache. Varying bits for prog are
needed in both cases.

Unfortunately, I don't know how most of the code works, I have separated
this manually, only by looking at the original code. That means that this
patch is experimental. Together with following commit it works
(there seem to be no regressions at all in VK-GL-CTS
[openglcts/data/mustpass/gl/khronos_mustpass/4.6.1.x/gl33-master.txt]
and all benchmarks behaved normally). Unfortunately, I cannot test in
Piglit because of technical problems, so there might be still some
work needed.

I am mainly asking to help with the function names,
look for bugs and pointing out useless code. I will be glad for every
review.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/nv50/nv50_program.c   | 344 ++
 1 file changed, 344 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c 
b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index a3f3054cbaa..bf63b20f613 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -139,6 +139,130 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info_out 
*info)
return 0;
 }
 
+static int
+nv50_vertprog_assign_slots_info(struct nv50_ir_prog_info_out *info)
+{
+   unsigned i, n, c;
+
+   n = 0;
+   for (i = 0; i < info->numInputs; ++i) {
+   for (c = 0; c < 4; ++c)
+ if (info->in[i].mask & (1 << c))
+info->in[i].slot[c] = n++;
+   }
+
+   /* VertexID before InstanceID */
+   if (info->io.vertexId < info->numSysVals)
+  info->sv[info->io.vertexId].slot[0] = n++;
+   if (info->io.instanceId < info->numSysVals)
+  info->sv[info->io.instanceId].slot[0] = n++;
+
+   n = 0;
+   for (i = 0; i < info->numOutputs; ++i) {
+  for (c = 0; c < 4; ++c)
+ if (info->out[i].mask & (1 << c))
+info->out[i].slot[c] = n++;
+   }
+
+   return 0;
+}
+
+static int
+nv50_vertprog_assign_slots_prog(struct nv50_ir_prog_info_out *info)
+{
+   struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
+   unsigned i, n, c;
+
+   n = 0;
+   for (i = 0; i < info->numInputs; ++i) {
+  prog->in[i].id = i;
+  prog->in[i].sn = info->in[i].sn;
+  prog->in[i].si = info->in[i].si;
+  prog->in[i].hw = n;
+  prog->in[i].mask = info->in[i].mask;
+
+  prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
+
+  for (c = 0; c < 4; ++c)
+ if (info->in[i].mask & (1 << c))
+n++;
+
+  if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
+   }
+   prog->in_nr = info->numInputs;
+
+   for (i = 0; i < info->numSysVals; ++i) {
+  switch (info->sv[i].sn) {
+  case TGSI_SEMANTIC_INSTANCEID:
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
+ continue;
+  case TGSI_SEMANTIC_VERTEXID:
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
+ prog->vp.attrs[2] |= 
NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
+ continue;
+  default:
+ break;
+  }
+   }
+
+   /*
+* Corner case: VP has no inputs, but we will still need to submit data to
+* draw it. HW will shout at us and won't draw anything if we don't enable
+* any input, so let's just pretend it's the first one.
+*/
+   if (prog->vp.attrs[0] == 0 &&
+   prog->vp.attrs[1] == 0 &&
+   prog->vp.attrs[2] == 0)
+  prog->vp.attrs[0] |= 0xf;
+
+   n = 0;
+   for (i = 0; i < info->numOutputs; ++i) {
+  switch (info->out[i].sn) {
+  case TGSI_SEMANTIC_PSIZE:
+ prog->vp.psiz = i;
+ break;
+  case TGSI_SEMANTIC_CLIPDIST:
+ prog->vp.clpd[info->out[i].si] = n;
+ break;
+  case TGSI_SEMANTIC_EDGEFLAG:
+ prog->vp.edgeflag = i;
+ break;
+  case TGSI_SEMANTIC_BCOLOR:
+ prog->vp.bfc[info->out[i].si] = i;
+ break;
+  case TGSI_SEMANTIC_LAYER:
+ prog->gp.has_layer = true;
+ prog->gp.layerid = n;
+ break;
+  case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ prog->gp.has_viewport = true;
+ prog->gp.viewportid = n;
+ break;
+  default:
+ break;
+  }
+  prog->out[i].id = i;
+  prog->out[i].sn = info->out[i].sn;
+  prog->out[i].si = info->out[i].si;
+  prog->out[i].hw = n;
+  prog->out[i].mask = info->out[i].mask;
+
+  for (c = 0; c < 4;

[Mesa-dev] [PATCH v2 6/7] tgsi/util: Change boolean for bool

2020-02-21 Thread Mark Menzynski
I was getting errors with "boolean" when compiling. This patch changes
boolean to bool from .

Signed-off-by: Mark Menzynski 
---
 src/gallium/auxiliary/tgsi/tgsi_util.c | 2 +-
 src/gallium/auxiliary/tgsi/tgsi_util.h | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c 
b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 1e5582ba273..e1b604cff0e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -537,7 +537,7 @@ tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type 
tgsi_tex)
 }
 
 
-boolean
+bool
 tgsi_is_shadow_target(enum tgsi_texture_type target)
 {
switch (target) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h 
b/src/gallium/auxiliary/tgsi/tgsi_util.h
index 686b90f467e..6dc576b1a00 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -28,6 +28,7 @@
 #ifndef TGSI_UTIL_H
 #define TGSI_UTIL_H
 
+#include 
 #include "pipe/p_shader_tokens.h"
 
 #if defined __cplusplus
@@ -84,11 +85,11 @@ tgsi_util_get_texture_coord_dim(enum tgsi_texture_type 
tgsi_tex);
 int
 tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type tgsi_tex);
 
-boolean
+bool
 tgsi_is_shadow_target(enum tgsi_texture_type target);
 
 
-static inline boolean
+static inline bool
 tgsi_is_msaa_target(enum tgsi_texture_type target)
 {
return (target == TGSI_TEXTURE_2D_MSAA ||
-- 
2.21.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/7] util/blob: Add overwrite function for uint8

2020-02-21 Thread Mark Menzynski
Overwrite function for this type  was missing and I needed it for my project.

Signed-off-by: Mark Menzynski 
---
 src/util/blob.c |  9 +
 src/util/blob.h | 15 +++
 2 files changed, 24 insertions(+)

diff --git a/src/util/blob.c b/src/util/blob.c
index 94d5a9dea74..5bf4b924c91 100644
--- a/src/util/blob.c
+++ b/src/util/blob.c
@@ -214,6 +214,15 @@ BLOB_WRITE_TYPE(blob_write_intptr, intptr_t)
 #define ASSERT_ALIGNED(_offset, _align) \
assert(ALIGN((_offset), (_align)) == (_offset))
 
+bool
+blob_overwrite_uint8 (struct blob *blob,
+  size_t offset,
+  uint8_t value)
+{
+   ASSERT_ALIGNED(offset, sizeof(value));
+   return blob_overwrite_bytes(blob, offset, , sizeof(value));
+}
+
 bool
 blob_overwrite_uint32 (struct blob *blob,
size_t offset,
diff --git a/src/util/blob.h b/src/util/blob.h
index 9113331254a..e1e156eb43f 100644
--- a/src/util/blob.h
+++ b/src/util/blob.h
@@ -183,6 +183,21 @@ blob_overwrite_bytes(struct blob *blob,
 bool
 blob_write_uint8(struct blob *blob, uint8_t value);
 
+/**
+ * Overwrite a uint8_t previously written to the blob.
+ *
+ * Writes a uint8_t value to an existing portion of the blob at an offset of
+ * \offset.  This data range must have previously been written to the blob by
+ * one of the blob_write_* calls.
+ *
+ * \return True unless the requested position or position+to_write lie outside
+ * the current blob's size.
+ */
+bool
+blob_overwrite_uint8(struct blob *blob,
+ size_t offset,
+ uint8_t value);
+
 /**
  * Add a uint16_t to a blob.
  *
-- 
2.21.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/7] nv50/ir: Add prog_info_out print

2020-02-21 Thread Mark Menzynski
Adds a function for printing nv50_ir_prog_info_out structure
in JSON-like format, which could be used in debugging.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h  |   3 +
 .../drivers/nouveau/codegen/nv50_ir_print.cpp | 153 ++
 2 files changed, 156 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index eea32133ccf..10ae5cbe420 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -275,6 +275,9 @@ namespace nv50_ir
 }
 #endif
 
+extern void
+nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *);
+
 /* Serialize a nv50_ir_prog_info_out structure and save it into blob */
 extern bool
 nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 5dcbf3c3e0c..4877047c0ec 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -22,6 +22,7 @@
 
 #include "codegen/nv50_ir.h"
 #include "codegen/nv50_ir_target.h"
+#include "codegen/nv50_ir_driver.h"
 
 #include 
 
@@ -852,3 +853,155 @@ Function::printLiveIntervals() const
 }
 
 } // namespace nv50_ir
+
+extern void
+nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *info_out)
+{
+   int i;
+
+   INFO("{\n");
+   INFO("   \"target\":\"%d\",\n", info_out->target);
+   INFO("   \"type\":\"%d\",\n", info_out->type);
+
+   // Bin
+   INFO("   \"bin\":{\n");
+   INFO("  \"maxGPR\":\"%d\",\n", info_out->bin.maxGPR);
+   INFO("  \"tlsSpace\":\"%d\",\n", info_out->bin.tlsSpace);
+   INFO("  \"smemSize\":\"%d\",\n", info_out->bin.smemSize);
+   INFO("  \"codeSize\":\"%d\",\n", info_out->bin.codeSize);
+   INFO("  \"instructions\":\"%d\",\n", info_out->bin.instructions);
+
+   // RelocInfo
+   INFO("  \"RelocInfo\":");
+   if (!info_out->bin.relocData) {
+  INFO("\"NULL\",\n");
+   } else {
+  nv50_ir::RelocInfo *reloc = (nv50_ir::RelocInfo 
*)info_out->bin.relocData;
+  INFO("{\n");
+  INFO(" \"codePos\":\"%d\",\n", reloc->codePos);
+  INFO(" \"libPos\":\"%d\",\n", reloc->libPos);
+  INFO(" \"dataPos\":\"%d\",\n", reloc->dataPos);
+  INFO(" \"count\":\"%d\",\n", reloc->count);
+  INFO(" \"RelocEntry\":[\n");
+  for (unsigned int i = 0; i < reloc->count; i++) {
+ INFO("
{\"data\":\"%d\",\t\"mask\":\"%d\",\t\"offset\":\"%d\",\t\"bitPos\":\"%d\",\t\"type\":\"%d\"}",
+   reloc->entry[i].data, reloc->entry[i].mask, 
reloc->entry[i].offset, reloc->entry[i].bitPos, reloc->entry[i].type
+   );
+  }
+  INFO("\n");
+  INFO(" ]\n");
+  INFO("  },\n");
+   }
+
+   // FixupInfo
+   INFO("  \"FixupInfo\":");
+   if (!info_out->bin.fixupData) {
+  INFO("\"NULL\"\n");
+   } else {
+  nv50_ir::FixupInfo *fixup = (nv50_ir::FixupInfo 
*)info_out->bin.fixupData;
+  INFO("{\n");
+  INFO(" \"count\":\"%d\"\n", fixup->count);
+  INFO(" \"FixupEntry\":[\n");
+  for (unsigned int i = 0; i < fixup->count; i++) {
+ INFO("
{\"apply\":\"%p\",\t\"ipa\":\"%d\",\t\"reg\":\"%d\",\t\"loc\":\"%d\"}",
+   fixup->entry[i].apply, fixup->entry[i].ipa, 
fixup->entry[i].reg, fixup->entry[i].loc);
+  }
+  INFO("\n");
+  INFO(" ]\n");
+  INFO("  }\n");
+
+  INFO("   },\n");
+   }
+
+   if (info_out->numSysVals) {
+  INFO("   \"sv\":[\n");
+  for (i = 0; i < info_out->numSysVals; i++) {
+ if (&(info_out->sv[i])) {
+INFO("  {\"id\":\"%d\", \"sn\":\"%d\", \"si\":\"%d\"}",
+   info_out->sv[i].id, info_out->sv[i].sn, info_out->sv

[Mesa-dev] [PATCH v2 7/7] nvc0: Add shader disk caching

2020-02-21 Thread Mark Menzynski
Adds shader disk caching for nvc0 to reduce the need to every time compile
shaders. Shaders are saved into disk_shader_cache from nvc0_screen structure.

It serializes the input nv50_ir_prog_info to compute the hash key and
also to do a byte compare between the original nv50_ir_prog_info and the one
saved in the cache. If keys match and also the byte compare returns they
are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the
cache can be used instead of compiling input info.

Seems to be significantly improving loading times, these are the results
from running bunch of shaders:
cache off
real2m58.574s
user21m34.018s
sys 0m8.055s

cache on, first run
real3m32.617s
user24m52.701s
sys 0m20.400s

cache on, second run
real0m23.745s
user2m43.566s
sys 0m4.532s

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/nvc0/nvc0_context.h   |  1 +
 .../drivers/nouveau/nvc0/nvc0_program.c   | 48 ---
 .../drivers/nouveau/nvc0/nvc0_shader_state.c  |  3 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c |  2 +
 4 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 8a2a8f2797e..4b83d1afeb4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -321,6 +321,7 @@ extern struct draw_stage *nvc0_draw_render_stage(struct 
nvc0_context *);
 
 /* nvc0_program.c */
 bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset,
+struct disk_cache *,
 struct pipe_debug_callback *);
 bool nvc0_program_upload(struct nvc0_context *, struct nvc0_program *);
 void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 1a5073292e8..912d2dece4f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -24,6 +24,7 @@
 
 #include "compiler/nir/nir.h"
 #include "tgsi/tgsi_ureg.h"
+#include "util/blob.h"
 
 #include "nvc0/nvc0_context.h"
 
@@ -568,11 +569,19 @@ nvc0_program_dump(struct nvc0_program *prog)
 
 bool
 nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
+   struct disk_cache *disk_shader_cache,
struct pipe_debug_callback *debug)
 {
+   struct blob blob;
struct nv50_ir_prog_info *info;
struct nv50_ir_prog_info_out info_out = {};
-   int ret;
+
+   void *cached_data = NULL;
+   size_t cached_size;
+   bool shader_found = false;
+
+   int ret = 0;
+   cache_key key;
 
info = CALLOC_STRUCT(nv50_ir_prog_info);
if (!info)
@@ -631,14 +640,39 @@ nvc0_program_translate(struct nvc0_program *prog, 
uint16_t chipset,
info->assignSlots = nvc0_program_assign_varying_slots;
 
/* these fields might be overwritten by the compiler */
-   info_out.bin.smemSize = prog->cp.smem_size;
-   info_out.io.genUserClip = prog->vp.num_ucps;
+   info->bin.smemSize = prog->cp.smem_size;
+   info->io.genUserClip = prog->vp.num_ucps;
+
+   blob_init();
+
+   if (disk_shader_cache) {
+  nv50_ir_prog_info_serialize(, info);
+  disk_cache_compute_key(disk_shader_cache, blob.data, blob.size, key);
+  cached_data = disk_cache_get(disk_shader_cache, key, _size);
+
+  if (cached_data && cached_size >= blob.size) { // blob.size is the size 
of serialized "info"
+ if (memcmp(cached_data, blob.data, blob.size) == 0) {
+shader_found = true;
+/* Blob contains only "info". In disk cache, "info_out" comes 
right after it */
+size_t offset = blob.size;
+nv50_ir_prog_info_out_deserialize(cached_data, cached_size, 
offset, _out);
+ }
+  }
+  free(cached_data);
+   }
+   if (!shader_found) {
 
-   ret = nv50_ir_generate_code(info, _out);
-   if (ret) {
-  NOUVEAU_ERR("shader translation failed: %i\n", ret);
-  goto out;
+  ret = nv50_ir_generate_code(info, _out);
+  if (ret) {
+ NOUVEAU_ERR("shader translation failed: %i\n", ret);
+ goto out;
+  }
+  if (disk_shader_cache) {
+ nv50_ir_prog_info_out_serialize(, _out);
+ disk_cache_put(disk_shader_cache, key, blob.data, blob.size, NULL);
+  }
}
+   blob_finish();
 
prog->code = info_out.bin.code;
prog->code_size = info_out.bin.codeSize;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 774c5648113..4327a89454b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -54,7 +54,8 @@ nvc0_program_validate(struct

[Mesa-dev] [PATCH v2 5/7] nv50/ir: Add nv50_ir_prog_info serialize

2020-02-21 Thread Mark Menzynski
Adds a function for serializing a nv50_ir_prog_info structure, which is
needed for shader caching.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h  |  4 +
 .../nouveau/codegen/nv50_ir_serialize.cpp | 81 +++
 2 files changed, 85 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 10ae5cbe420..3728470ab45 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -278,6 +278,10 @@ namespace nv50_ir
 extern void
 nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *);
 
+/* Serialize a nv50_ir_prog_info structure and save it into blob */
+extern bool
+nv50_ir_prog_info_serialize(struct blob *, struct nv50_ir_prog_info *);
+
 /* Serialize a nv50_ir_prog_info_out structure and save it into blob */
 extern bool
 nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
index 5671483bd4e..b640cb67503 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
@@ -17,6 +17,87 @@ enum FixupApplyFunc {
FLIP_GM107 = 7
 };
 
+extern bool
+nv50_ir_prog_info_serialize(struct blob *blob, struct nv50_ir_prog_info *info)
+{
+   blob_write_uint16(blob, info->target);
+   blob_write_uint8(blob, info->type);
+   blob_write_uint8(blob, info->optLevel);
+   blob_write_uint8(blob, info->dbgFlags);
+   blob_write_uint8(blob, info->omitLineNum);
+   blob_write_uint32(blob, info->bin.smemSize);
+   blob_write_uint16(blob, info->bin.maxOutput);
+   blob_write_uint8(blob, info->bin.sourceRep);
+
+   switch(info->bin.sourceRep) {
+  case PIPE_SHADER_IR_TGSI: {
+ struct tgsi_token *tokens = (struct tgsi_token *)info->bin.source;
+ unsigned int num_tokens = tgsi_num_tokens(tokens);
+
+ blob_write_uint32(blob, num_tokens);
+ blob_write_bytes(blob, tokens, num_tokens * sizeof(struct 
tgsi_token));
+ break;
+  }
+  case PIPE_SHADER_IR_NIR: {
+ struct nir_shader *nir = (struct nir_shader *)info->bin.source;
+ nir_serialize(blob, nir, false);
+ break;
+  }
+  default:
+ assert(!"unhandled info->bin.sourceRep");
+ return false;
+   }
+
+   blob_write_uint16(blob, info->immd.bufSize);
+   blob_write_bytes(blob, info->immd.buf, info->immd.bufSize * 
sizeof(*info->immd.buf));
+   blob_write_uint16(blob, info->immd.count);
+   blob_write_bytes(blob, info->immd.data, info->immd.count * 
sizeof(*info->immd.data));
+   blob_write_bytes(blob, info->immd.type, info->immd.count * 16); // for each 
vec4 (128 bit)
+
+   switch (info->type) {
+  case PIPE_SHADER_VERTEX:
+ blob_write_bytes(blob, info->prop.vp.inputMask,
+  4 * sizeof(*info->prop.vp.inputMask)); /* array of 
size 4 */
+ break;
+  case PIPE_SHADER_TESS_CTRL:
+ blob_write_uint32(blob, info->prop.cp.inputOffset);
+ blob_write_uint32(blob, info->prop.cp.sharedOffset);
+ blob_write_uint32(blob, info->prop.cp.gridInfoBase);
+ blob_write_bytes(blob, info->prop.cp.numThreads,
+  3 * sizeof(*info->prop.cp.numThreads)); /* array of 
size 3 */
+  case PIPE_SHADER_GEOMETRY:
+ blob_write_uint8(blob, info->prop.gp.inputPrim);
+ break;
+  case PIPE_SHADER_FRAGMENT:
+ blob_write_uint8(blob, info->prop.fp.persampleInvocation);
+ break;
+  default:
+ break;
+   }
+
+   blob_write_uint8(blob, info->io.auxCBSlot);
+   blob_write_uint16(blob, info->io.ucpBase);
+   blob_write_uint16(blob, info->io.drawInfoBase);
+   blob_write_uint16(blob, info->io.alphaRefBase);
+   blob_write_uint8(blob, info->io.pointSize);
+   blob_write_uint8(blob, info->io.viewportId);
+   blob_write_bytes(blob, info->io.backFaceColor, 2 * 
sizeof(*info->io.backFaceColor));
+   blob_write_uint8(blob, info->io.mul_zero_wins);
+   blob_write_uint8(blob, info->io.nv50styleSurfaces);
+   blob_write_uint16(blob, info->io.texBindBase);
+   blob_write_uint16(blob, info->io.fbtexBindBase);
+   blob_write_uint16(blob, info->io.suInfoBase);
+   blob_write_uint16(blob, info->io.bindlessBase);
+   blob_write_uint16(blob, info->io.bufInfoBase);
+   blob_write_uint16(blob, info->io.sampleInfoBase);
+   blob_write_uint8(blob, info->io.msInfoCBSlot);
+   blob_write_uint16(blob, info->io.msInfoBase);
+   blob_write_uint16(blob, info->io.uboInfoBase);
+   blob_write_uint8(blob, info->io.genUserClip);
+
+   return true;
+}
+
 extern bool
 nv50_ir_prog_info_out_serialize(struct blob *blo

[Mesa-dev] [PATCH v2 1/7] nv50/ir: add nv50_ir_prog_info_out

2020-02-21 Thread Mark Menzynski
From: Karol Herbst 

Split out the output relevant fields from the nv50_ir_prog_info struct
in order to have a cleaner separation between the input and output of
the compilation.

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir.cpp   |  49 ++--
 src/gallium/drivers/nouveau/codegen/nv50_ir.h |   9 +-
 .../drivers/nouveau/codegen/nv50_ir_driver.h  | 117 +---
 .../nouveau/codegen/nv50_ir_from_common.cpp   |  14 +-
 .../nouveau/codegen/nv50_ir_from_common.h |   3 +-
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 204 +++---
 .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 256 +-
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp |   6 +-
 .../nouveau/codegen/nv50_ir_target.cpp|   2 +-
 .../drivers/nouveau/codegen/nv50_ir_target.h  |   5 +-
 .../nouveau/codegen/nv50_ir_target_nv50.cpp   |  17 +-
 .../nouveau/codegen/nv50_ir_target_nv50.h |   3 +-
 .../drivers/nouveau/nouveau_compiler.c|   9 +-
 .../drivers/nouveau/nv50/nv50_program.c   |  61 +++--
 .../drivers/nouveau/nvc0/nvc0_program.c   |  89 +++---
 15 files changed, 450 insertions(+), 394 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index c65853578f6..c2c5956874a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1241,15 +1241,18 @@ void Program::releaseValue(Value *value)
 extern "C" {
 
 static void
-nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
+nv50_ir_init_prog_info(struct nv50_ir_prog_info *info,
+   struct nv50_ir_prog_info_out *info_out)
 {
+   info_out->target = info->target;
+   info_out->type = info->type;
if (info->type == PIPE_SHADER_TESS_CTRL || info->type == 
PIPE_SHADER_TESS_EVAL) {
-  info->prop.tp.domain = PIPE_PRIM_MAX;
-  info->prop.tp.outputPrim = PIPE_PRIM_MAX;
+  info_out->prop.tp.domain = PIPE_PRIM_MAX;
+  info_out->prop.tp.outputPrim = PIPE_PRIM_MAX;
}
if (info->type == PIPE_SHADER_GEOMETRY) {
-  info->prop.gp.instanceCount = 1;
-  info->prop.gp.maxVertices = 1;
+  info_out->prop.gp.instanceCount = 1;
+  info_out->prop.gp.maxVertices = 1;
}
if (info->type == PIPE_SHADER_COMPUTE) {
   info->prop.cp.numThreads[0] =
@@ -1257,23 +1260,26 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
   info->prop.cp.numThreads[2] = 1;
}
info->io.pointSize = 0xff;
-   info->io.instanceId = 0xff;
-   info->io.vertexId = 0xff;
-   info->io.edgeFlagIn = 0xff;
-   info->io.edgeFlagOut = 0xff;
-   info->io.fragDepth = 0xff;
-   info->io.sampleMask = 0xff;
+   info_out->bin.smemSize = info->bin.smemSize;
+   info_out->io.genUserClip = info->io.genUserClip;
+   info_out->io.instanceId = 0xff;
+   info_out->io.vertexId = 0xff;
+   info_out->io.edgeFlagIn = 0xff;
+   info_out->io.edgeFlagOut = 0xff;
+   info_out->io.fragDepth = 0xff;
+   info_out->io.sampleMask = 0xff;
info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff;
 }
 
 int
-nv50_ir_generate_code(struct nv50_ir_prog_info *info)
+nv50_ir_generate_code(struct nv50_ir_prog_info *info,
+  struct nv50_ir_prog_info_out *info_out)
 {
int ret = 0;
 
nv50_ir::Program::Type type;
 
-   nv50_ir_init_prog_info(info);
+   nv50_ir_init_prog_info(info, info_out);
 
 #define PROG_TYPE_CASE(a, b)  \
case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
@@ -1301,15 +1307,16 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
   return -1;
}
prog->driver = info;
+   prog->driver_out = info_out;
prog->dbgFlags = info->dbgFlags;
prog->optLevel = info->optLevel;
 
switch (info->bin.sourceRep) {
case PIPE_SHADER_IR_NIR:
-  ret = prog->makeFromNIR(info) ? 0 : -2;
+  ret = prog->makeFromNIR(info, info_out) ? 0 : -2;
   break;
case PIPE_SHADER_IR_TGSI:
-  ret = prog->makeFromTGSI(info) ? 0 : -2;
+  ret = prog->makeFromTGSI(info, info_out) ? 0 : -2;
   break;
default:
   ret = -1;
@@ -1320,7 +1327,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
   prog->print();
 
-   targ->parseDriverInfo(info);
+   targ->parseDriverInfo(info, info_out);
prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
 
prog->convertToSSA();
@@ -1342,7 +1349,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
 
prog->optimizePostRA(info->optLevel);
 
-   if (!prog->emitBinary(info)) {
+   if (!prog->emitBinary(info_out)) {
   ret = -5;
   goto out;
}
@@ -1350,10 +1357,10 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
 out:
INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
 
-   info->bin.maxGPR = prog->maxGPR;
-   info->bin.code = prog->code;
-   info->bin.codeSize = prog->binSize;
-   info->bin.tlsSpace = 

[Mesa-dev] [PATCH v2 3/7] nv50/ir: Add nv50_ir_prog_info_out serialize and deserialize

2020-02-21 Thread Mark Menzynski
Adds functions for serializing and deserializing
nv50_ir_prog_info_out structure, which are needed for shader caching.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h  |  44 
 .../nouveau/codegen/nv50_ir_emit_gk110.cpp|  14 +-
 .../nouveau/codegen/nv50_ir_emit_gm107.cpp|  14 +-
 .../nouveau/codegen/nv50_ir_emit_nv50.cpp |   6 +-
 .../nouveau/codegen/nv50_ir_emit_nvc0.cpp |  14 +-
 .../nouveau/codegen/nv50_ir_serialize.cpp | 196 ++
 src/gallium/drivers/nouveau/meson.build   |   1 +
 7 files changed, 265 insertions(+), 24 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index dab1ce030cb..eea32133ccf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -25,6 +25,7 @@
 
 #include "pipe/p_shader_tokens.h"
 
+#include "util/blob.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_scan.h"
@@ -242,6 +243,49 @@ nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
 extern void nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size);
 
+
+#ifdef __cplusplus
+namespace nv50_ir
+{
+   class FixupEntry;
+   class FixupData;
+
+   void
+   gk110_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+ const nv50_ir::FixupData& data);
+   void
+   gm107_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+ const nv50_ir::FixupData& data);
+   void
+   nv50_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+const nv50_ir::FixupData& data);
+   void
+   nvc0_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+const nv50_ir::FixupData& data);
+   void
+   gk110_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
+  const nv50_ir::FixupData& data);
+   void
+   gm107_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
+  const nv50_ir::FixupData& data);
+   void
+   nvc0_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
+ const nv50_ir::FixupData& data);
+
+}
+#endif
+
+/* Serialize a nv50_ir_prog_info_out structure and save it into blob */
+extern bool
+nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *);
+
+/* Deserialize from data and save into a nv50_ir_prog_info_out structure
+ * using a pointer. Size is a total size of the serialized data.
+ * Offset points to where info_out in data is located. */
+extern bool
+nv50_ir_prog_info_out_deserialize(void *data, size_t size, size_t offset,
+  struct nv50_ir_prog_info_out *);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 2118c3153f7..e651d7fdcb0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1209,8 +1209,8 @@ CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
}
 }
 
-static void
-selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+void
+gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
 {
int loc = entry->loc;
if (data.force_persample_interp)
@@ -1227,7 +1227,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i)
   code[1] |= 1 << 13;
 
if (i->subOp == 1) {
-  addInterp(0, 0, selpFlip);
+  addInterp(0, 0, gk110_selpFlip);
}
 }
 
@@ -2042,8 +2042,8 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i)
code[1] |= (i->ipa & 0xc) << (19 - 2);
 }
 
-static void
-interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+void
+gk110_interpApply(const struct FixupEntry *entry, uint32_t *code, const 
FixupData& data)
 {
int ipa = entry->ipa;
int reg = entry->reg;
@@ -2078,10 +2078,10 @@ CodeEmitterGK110::emitINTERP(const Instruction *i)
 
if (i->op == OP_PINTERP) {
   srcId(i->src(1), 23);
-  addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
+  addInterp(i->ipa, SDATA(i->src(1)).id, gk110_interpApply);
} else {
   code[0] |= 0xff << 23;
-  addInterp(i->ipa, 0xff, interpApply);
+  addInterp(i->ipa, 0xff, gk110_interpApply);
}
 
srcId(i->src(0).getIndirect(0), 10);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index e244bd0d610..4970f14cb33 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_e

[Mesa-dev] [PATCH 4/8] nv50/ir: Add prog_info_out print

2020-02-17 Thread Mark Menzynski
Adds a function for printing nv50_ir_prog_info_out structure
in JSON-like format, which could be used in debugging.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h  |   3 +
 .../drivers/nouveau/codegen/nv50_ir_print.cpp | 155 ++
 2 files changed, 158 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index bc92a3bc4ee..9eb8a4c4798 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -275,6 +275,9 @@ namespace nv50_ir
 }
 #endif
 
+extern void
+nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *);
+
 /* Serialize a nv50_ir_prog_info_out structure and save it into blob */
 extern bool
 nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 5dcbf3c3e0c..f19d1a7d280 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -22,6 +22,7 @@
 
 #include "codegen/nv50_ir.h"
 #include "codegen/nv50_ir_target.h"
+#include "codegen/nv50_ir_driver.h"
 
 #include 
 
@@ -852,3 +853,157 @@ Function::printLiveIntervals() const
 }
 
 } // namespace nv50_ir
+
+extern void
+nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *info_out)
+{
+   int i;
+
+   INFO("{\n");
+   INFO("   \"target\":\"%d\",\n", info_out->target);
+   INFO("   \"type\":\"%d\",\n", info_out->type);
+
+   // Bin
+   INFO("   \"bin\":{\n");
+   INFO("  \"maxGPR\":\"%d\",\n", info_out->bin.maxGPR);
+   INFO("  \"tlsSpace\":\"%d\",\n", info_out->bin.tlsSpace);
+   INFO("  \"smemSize\":\"%d\",\n", info_out->bin.smemSize);
+   INFO("  \"codeSize\":\"%d\",\n", info_out->bin.codeSize);
+   INFO("  \"instructions\":\"%d\",\n", info_out->bin.instructions);
+
+   // RelocInfo
+   INFO("  \"RelocInfo\":");
+   if (!info_out->bin.relocData) {
+  INFO("\"NULL\",\n");
+   }
+   else {
+  nv50_ir::RelocInfo *reloc = (nv50_ir::RelocInfo 
*)info_out->bin.relocData;
+  INFO("{\n");
+  INFO(" \"codePos\":\"%d\",\n", reloc->codePos);
+  INFO(" \"libPos\":\"%d\",\n", reloc->libPos);
+  INFO(" \"dataPos\":\"%d\",\n", reloc->dataPos);
+  INFO(" \"count\":\"%d\",\n", reloc->count);
+  INFO(" \"RelocEntry\":[\n");
+  for (unsigned int i = 0; i < reloc->count; i++) {
+ INFO("
{\"data\":\"%d\",\t\"mask\":\"%d\",\t\"offset\":\"%d\",\t\"bitPos\":\"%d\",\t\"type\":\"%d\"}",
+   reloc->entry[i].data, reloc->entry[i].mask, 
reloc->entry[i].offset, reloc->entry[i].bitPos, reloc->entry[i].type
+   );
+  }
+  INFO("\n");
+  INFO(" ]\n");
+  INFO("  },\n");
+   }
+
+   // FixupInfo
+   INFO("  \"FixupInfo\":");
+   if (!info_out->bin.fixupData) {
+  INFO("\"NULL\"\n");
+   }
+   else {
+  nv50_ir::FixupInfo *fixup = (nv50_ir::FixupInfo 
*)info_out->bin.fixupData;
+  INFO("{\n");
+  INFO(" \"count\":\"%d\"\n", fixup->count);
+  INFO(" \"FixupEntry\":[\n");
+  for (unsigned int i = 0; i < fixup->count; i++) {
+ INFO("
{\"apply\":\"%p\",\t\"ipa\":\"%d\",\t\"reg\":\"%d\",\t\"loc\":\"%d\"}",
+   fixup->entry[i].apply, fixup->entry[i].ipa, 
fixup->entry[i].reg, fixup->entry[i].loc);
+  }
+  INFO("\n");
+  INFO(" ]\n");
+  INFO("  }\n");
+
+  INFO("   },\n");
+   }
+
+   if (info_out->numSysVals) {
+  INFO("   \"sv\":[\n");
+  for (i = 0; i < info_out->numSysVals; i++) {
+ if (&(info_out->sv[i])) {
+INFO("  {\"id\":\"%d\", \"sn\":\"%d\", \"si\":\"%d\"}",
+   info_out->sv[i].id, info_out->sv[i].sn, info

[Mesa-dev] [PATCH 8/8] nvc0: Add shader disk caching

2020-02-17 Thread Mark Menzynski
Adds shader disk caching for nvc0 to reduce the need to every time compile
shaders. Shaders are saved into disk_shader_cache from nvc0_screen structure.

It serializes the input nv50_ir_prog_info to compute the hash key and
also to do a byte compare between the original nv50_ir_prog_info and the one
saved in the cache. If keys match and also the byte compare returns they
are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the
cache can be used instead of compiling input info.

Seems to be significantly improving loading times. Piglit tests seem
to be OK.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/nvc0/nvc0_context.h   |  1 +
 .../drivers/nouveau/nvc0/nvc0_program.c   | 49 ---
 .../drivers/nouveau/nvc0/nvc0_shader_state.c  |  3 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c |  2 +
 4 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 8a2a8f2797e..4b83d1afeb4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -321,6 +321,7 @@ extern struct draw_stage *nvc0_draw_render_stage(struct 
nvc0_context *);
 
 /* nvc0_program.c */
 bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset,
+struct disk_cache *,
 struct pipe_debug_callback *);
 bool nvc0_program_upload(struct nvc0_context *, struct nvc0_program *);
 void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 1a5073292e8..06b6f7b4db5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -24,6 +24,7 @@
 
 #include "compiler/nir/nir.h"
 #include "tgsi/tgsi_ureg.h"
+#include "util/blob.h"
 
 #include "nvc0/nvc0_context.h"
 
@@ -568,11 +569,19 @@ nvc0_program_dump(struct nvc0_program *prog)
 
 bool
 nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
+   struct disk_cache *disk_shader_cache,
struct pipe_debug_callback *debug)
 {
+   struct blob blob;
struct nv50_ir_prog_info *info;
struct nv50_ir_prog_info_out info_out = {};
-   int ret;
+
+   void *cached_data = NULL;
+   size_t cached_size;
+   bool shader_found = false;
+
+   int ret = 0;
+   cache_key key;
 
info = CALLOC_STRUCT(nv50_ir_prog_info);
if (!info)
@@ -631,14 +640,38 @@ nvc0_program_translate(struct nvc0_program *prog, 
uint16_t chipset,
info->assignSlots = nvc0_program_assign_varying_slots;
 
/* these fields might be overwritten by the compiler */
-   info_out.bin.smemSize = prog->cp.smem_size;
-   info_out.io.genUserClip = prog->vp.num_ucps;
-
-   ret = nv50_ir_generate_code(info, _out);
-   if (ret) {
-  NOUVEAU_ERR("shader translation failed: %i\n", ret);
-  goto out;
+   info->bin.smemSize = prog->cp.smem_size;
+   info->io.genUserClip = prog->vp.num_ucps;
+
+   blob_init();
+   nv50_ir_prog_info_serialize(, info);
+
+   if (disk_shader_cache) {
+  disk_cache_compute_key(disk_shader_cache, blob.data, blob.size, key);
+  cached_data = disk_cache_get(disk_shader_cache, key, _size);
+
+  if (cached_data && cached_size >= blob.size) { // blob.size is the size 
of serialized "info"
+ if (memcmp(cached_data, blob.data, blob.size) == 0) {
+shader_found = true;
+/* Blob contains only "info". In disk cache, "info_out" comes 
right after it */
+size_t offset = blob.size;
+nv50_ir_prog_info_out_deserialize(cached_data, cached_size, 
offset, _out);
+ }
+  }
+  free(cached_data);
+   }
+   if (!shader_found) {
+  ret = nv50_ir_generate_code(info, _out);
+  if (ret) {
+ NOUVEAU_ERR("shader translation failed: %i\n", ret);
+ goto out;
+  }
+  if (disk_shader_cache) {
+ nv50_ir_prog_info_out_serialize(, _out);
+ disk_cache_put(disk_shader_cache, key, blob.data, blob.size, NULL);
+  }
}
+   blob_finish();
 
prog->code = info_out.bin.code;
prog->code_size = info_out.bin.codeSize;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 774c5648113..4327a89454b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -54,7 +54,8 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct 
nvc0_program *prog)
 
if (!prog->translated) {
   prog->translated = nvc0_program_translate(
- prog, nvc0->screen->base.device->chipset, >base.debug);
+ prog, nvc0->screen->ba

[Mesa-dev] [PATCH 3/8] nv50/ir: Add nv50_ir_prog_info_out serialize and deserialize

2020-02-17 Thread Mark Menzynski
Adds functions for serializing and deserializing
nv50_ir_prog_info_out structure, which are needed for shader caching.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h  |  44 
 .../nouveau/codegen/nv50_ir_emit_gk110.cpp|  14 +-
 .../nouveau/codegen/nv50_ir_emit_gm107.cpp|  14 +-
 .../nouveau/codegen/nv50_ir_emit_nv50.cpp |   6 +-
 .../nouveau/codegen/nv50_ir_emit_nvc0.cpp |  14 +-
 .../nouveau/codegen/nv50_ir_serialize.cpp | 196 ++
 src/gallium/drivers/nouveau/meson.build   |   1 +
 7 files changed, 265 insertions(+), 24 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index f6b5415bc95..bc92a3bc4ee 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -25,6 +25,7 @@
 
 #include "pipe/p_shader_tokens.h"
 
+#include "util/blob.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_scan.h"
@@ -242,6 +243,49 @@ nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
 extern void nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size);
 
+
+#ifdef __cplusplus
+namespace nv50_ir
+{
+   class FixupEntry;
+   class FixupData;
+
+   void
+   gk110_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+ const nv50_ir::FixupData& data);
+   void
+   gm107_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+ const nv50_ir::FixupData& data);
+   void
+   nv50_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+const nv50_ir::FixupData& data);
+   void
+   nvc0_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
+const nv50_ir::FixupData& data);
+   void
+   gk110_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
+  const nv50_ir::FixupData& data);
+   void
+   gm107_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
+  const nv50_ir::FixupData& data);
+   void
+   nvc0_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
+ const nv50_ir::FixupData& data);
+
+}
+#endif
+
+/* Serialize a nv50_ir_prog_info_out structure and save it into blob */
+extern bool
+nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *);
+
+/* Deserialize from data and save into a nv50_ir_prog_info_out structure
+ * using a pointer. Size is a total size of the serialized data.
+ * Offset points to where info_out in data is located. */
+extern bool
+nv50_ir_prog_info_out_deserialize(void *data, size_t size, size_t offset,
+ struct nv50_ir_prog_info_out *);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 2118c3153f7..e651d7fdcb0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1209,8 +1209,8 @@ CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
}
 }
 
-static void
-selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+void
+gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
 {
int loc = entry->loc;
if (data.force_persample_interp)
@@ -1227,7 +1227,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i)
   code[1] |= 1 << 13;
 
if (i->subOp == 1) {
-  addInterp(0, 0, selpFlip);
+  addInterp(0, 0, gk110_selpFlip);
}
 }
 
@@ -2042,8 +2042,8 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i)
code[1] |= (i->ipa & 0xc) << (19 - 2);
 }
 
-static void
-interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+void
+gk110_interpApply(const struct FixupEntry *entry, uint32_t *code, const 
FixupData& data)
 {
int ipa = entry->ipa;
int reg = entry->reg;
@@ -2078,10 +2078,10 @@ CodeEmitterGK110::emitINTERP(const Instruction *i)
 
if (i->op == OP_PINTERP) {
   srcId(i->src(1), 23);
-  addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
+  addInterp(i->ipa, SDATA(i->src(1)).id, gk110_interpApply);
} else {
   code[0] |= 0xff << 23;
-  addInterp(i->ipa, 0xff, interpApply);
+  addInterp(i->ipa, 0xff, gk110_interpApply);
}
 
srcId(i->src(0).getIndirect(0), 10);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index e244bd0d610..4970f14cb33 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
++

[Mesa-dev] [PATCH 2/8] util/blob: Add overwrite function for uint8

2020-02-17 Thread Mark Menzynski
Overwrite function for this type  was missing and I needed it for my project.

Signed-off-by: Mark Menzynski 
---
 src/util/blob.c |  9 +
 src/util/blob.h | 15 +++
 2 files changed, 24 insertions(+)

diff --git a/src/util/blob.c b/src/util/blob.c
index 94d5a9dea74..5bf4b924c91 100644
--- a/src/util/blob.c
+++ b/src/util/blob.c
@@ -214,6 +214,15 @@ BLOB_WRITE_TYPE(blob_write_intptr, intptr_t)
 #define ASSERT_ALIGNED(_offset, _align) \
assert(ALIGN((_offset), (_align)) == (_offset))
 
+bool
+blob_overwrite_uint8 (struct blob *blob,
+  size_t offset,
+  uint8_t value)
+{
+   ASSERT_ALIGNED(offset, sizeof(value));
+   return blob_overwrite_bytes(blob, offset, , sizeof(value));
+}
+
 bool
 blob_overwrite_uint32 (struct blob *blob,
size_t offset,
diff --git a/src/util/blob.h b/src/util/blob.h
index 9113331254a..d5496fef1cd 100644
--- a/src/util/blob.h
+++ b/src/util/blob.h
@@ -209,6 +209,21 @@ blob_write_uint16(struct blob *blob, uint16_t value);
 bool
 blob_write_uint32(struct blob *blob, uint32_t value);
 
+/**
+ * Overwrite a uint8_t previously written to the blob.
+ *
+ * Writes a uint8_t value to an existing portion of the blob at an offset of
+ * \offset.  This data range must have previously been written to the blob by
+ * one of the blob_write_* calls.
+ *
+ * \return True unless the requested position or position+to_write lie outside
+ * the current blob's size.
+ */
+bool
+blob_overwrite_uint8(struct blob *blob,
+ size_t offset,
+ uint8_t value);
+
 /**
  * Overwrite a uint32_t previously written to the blob.
  *
-- 
2.21.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/8] nv50/ir: add nv50_ir_prog_info_out

2020-02-17 Thread Mark Menzynski
From: Karol Herbst 

Split out the output relevant fields from the nv50_ir_prog_info struct
in order to have a cleaner separation between the input and output of
the compilation.

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir.cpp   |  49 ++--
 src/gallium/drivers/nouveau/codegen/nv50_ir.h |   9 +-
 .../drivers/nouveau/codegen/nv50_ir_driver.h  | 117 +---
 .../nouveau/codegen/nv50_ir_from_common.cpp   |  14 +-
 .../nouveau/codegen/nv50_ir_from_common.h |   3 +-
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 202 +++---
 .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 254 +-
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp |   6 +-
 .../nouveau/codegen/nv50_ir_target.cpp|   2 +-
 .../drivers/nouveau/codegen/nv50_ir_target.h  |   5 +-
 .../nouveau/codegen/nv50_ir_target_nv50.cpp   |  17 +-
 .../nouveau/codegen/nv50_ir_target_nv50.h |   3 +-
 .../drivers/nouveau/nouveau_compiler.c|   9 +-
 .../drivers/nouveau/nv50/nv50_program.c   |  61 ++---
 .../drivers/nouveau/nvc0/nvc0_program.c   |  89 +++---
 15 files changed, 448 insertions(+), 392 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index c65853578f6..c2c5956874a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1241,15 +1241,18 @@ void Program::releaseValue(Value *value)
 extern "C" {
 
 static void
-nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
+nv50_ir_init_prog_info(struct nv50_ir_prog_info *info,
+   struct nv50_ir_prog_info_out *info_out)
 {
+   info_out->target = info->target;
+   info_out->type = info->type;
if (info->type == PIPE_SHADER_TESS_CTRL || info->type == 
PIPE_SHADER_TESS_EVAL) {
-  info->prop.tp.domain = PIPE_PRIM_MAX;
-  info->prop.tp.outputPrim = PIPE_PRIM_MAX;
+  info_out->prop.tp.domain = PIPE_PRIM_MAX;
+  info_out->prop.tp.outputPrim = PIPE_PRIM_MAX;
}
if (info->type == PIPE_SHADER_GEOMETRY) {
-  info->prop.gp.instanceCount = 1;
-  info->prop.gp.maxVertices = 1;
+  info_out->prop.gp.instanceCount = 1;
+  info_out->prop.gp.maxVertices = 1;
}
if (info->type == PIPE_SHADER_COMPUTE) {
   info->prop.cp.numThreads[0] =
@@ -1257,23 +1260,26 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
   info->prop.cp.numThreads[2] = 1;
}
info->io.pointSize = 0xff;
-   info->io.instanceId = 0xff;
-   info->io.vertexId = 0xff;
-   info->io.edgeFlagIn = 0xff;
-   info->io.edgeFlagOut = 0xff;
-   info->io.fragDepth = 0xff;
-   info->io.sampleMask = 0xff;
+   info_out->bin.smemSize = info->bin.smemSize;
+   info_out->io.genUserClip = info->io.genUserClip;
+   info_out->io.instanceId = 0xff;
+   info_out->io.vertexId = 0xff;
+   info_out->io.edgeFlagIn = 0xff;
+   info_out->io.edgeFlagOut = 0xff;
+   info_out->io.fragDepth = 0xff;
+   info_out->io.sampleMask = 0xff;
info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff;
 }
 
 int
-nv50_ir_generate_code(struct nv50_ir_prog_info *info)
+nv50_ir_generate_code(struct nv50_ir_prog_info *info,
+  struct nv50_ir_prog_info_out *info_out)
 {
int ret = 0;
 
nv50_ir::Program::Type type;
 
-   nv50_ir_init_prog_info(info);
+   nv50_ir_init_prog_info(info, info_out);
 
 #define PROG_TYPE_CASE(a, b)  \
case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
@@ -1301,15 +1307,16 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
   return -1;
}
prog->driver = info;
+   prog->driver_out = info_out;
prog->dbgFlags = info->dbgFlags;
prog->optLevel = info->optLevel;
 
switch (info->bin.sourceRep) {
case PIPE_SHADER_IR_NIR:
-  ret = prog->makeFromNIR(info) ? 0 : -2;
+  ret = prog->makeFromNIR(info, info_out) ? 0 : -2;
   break;
case PIPE_SHADER_IR_TGSI:
-  ret = prog->makeFromTGSI(info) ? 0 : -2;
+  ret = prog->makeFromTGSI(info, info_out) ? 0 : -2;
   break;
default:
   ret = -1;
@@ -1320,7 +1327,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
   prog->print();
 
-   targ->parseDriverInfo(info);
+   targ->parseDriverInfo(info, info_out);
prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
 
prog->convertToSSA();
@@ -1342,7 +1349,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
 
prog->optimizePostRA(info->optLevel);
 
-   if (!prog->emitBinary(info)) {
+   if (!prog->emitBinary(info_out)) {
   ret = -5;
   goto out;
}
@@ -1350,10 +1357,10 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
 out:
INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
 
-   info->bin.maxGPR = prog->maxGPR;
-   info->bin.code = prog->code;
-   info->bin.codeSize = prog->binSize;
-   info->bin.tlsSpace = 

[Mesa-dev] [PATCH 7/8] nv50/ir: Move separateFragData

2020-02-17 Thread Mark Menzynski
Nv50_ir_prog_info (input) was in the wrong place, moved it to
nv50_ir_prog_info_out.

Signed-off-by: Mark Menzynski 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h  | 2 +-
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp  | 2 +-
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index cdf19eeabcf..30498ceffaf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -112,7 +112,6 @@ struct nv50_ir_prog_info
  uint8_t inputPrim;
   } gp;
   struct {
- bool separateFragData;
  bool persampleInvocation;
   } fp;
   struct {
@@ -200,6 +199,7 @@ struct nv50_ir_prog_info_out
  bool usesSampleMaskIn;
  bool readsFramebuffer;
  bool readsSampleLocations;
+ bool separateFragData;
   } fp;
} prop;
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 3efeaab4569..cf5f3d6d7e7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -2100,7 +2100,7 @@ Converter::visit(nir_intrinsic_instr *insn)
   atom->setIndirect(0, 0, address);
   atom->subOp = getSubOp(op);
 
-  info->io.globalAccess |= 0x2;
+  info_out->io.globalAccess |= 0x2;
   break;
}
case nir_intrinsic_bindless_image_atomic_add:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 5850dc18fec..c2322f3856a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1176,7 +1176,7 @@ void Source::scanProperty(const struct tgsi_full_property 
*prop)
   info_out->prop.gp.instanceCount = prop->u[0].Data;
   break;
case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
-  info->prop.fp.separateFragData = true;
+  info_out->prop.fp.separateFragData = true;
   break;
case TGSI_PROPERTY_FS_COORD_ORIGIN:
case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
-- 
2.21.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/8] tgsi/util: Change boolean for bool

2020-02-17 Thread Mark Menzynski
I was getting errors with "boolean" when compiling. This patch changes
boolean to bool from .

Signed-off-by: Mark Menzynski 
---
 src/gallium/auxiliary/tgsi/tgsi_util.c | 2 +-
 src/gallium/auxiliary/tgsi/tgsi_util.h | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c 
b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 1e5582ba273..e1b604cff0e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -537,7 +537,7 @@ tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type 
tgsi_tex)
 }
 
 
-boolean
+bool
 tgsi_is_shadow_target(enum tgsi_texture_type target)
 {
switch (target) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h 
b/src/gallium/auxiliary/tgsi/tgsi_util.h
index 686b90f467e..6dc576b1a00 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -28,6 +28,7 @@
 #ifndef TGSI_UTIL_H
 #define TGSI_UTIL_H
 
+#include 
 #include "pipe/p_shader_tokens.h"
 
 #if defined __cplusplus
@@ -84,11 +85,11 @@ tgsi_util_get_texture_coord_dim(enum tgsi_texture_type 
tgsi_tex);
 int
 tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type tgsi_tex);
 
-boolean
+bool
 tgsi_is_shadow_target(enum tgsi_texture_type target);
 
 
-static inline boolean
+static inline bool
 tgsi_is_msaa_target(enum tgsi_texture_type target)
 {
return (target == TGSI_TEXTURE_2D_MSAA ||
-- 
2.21.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/8] nv50/ir: Add nv50_ir_prog_info serialize

2020-02-17 Thread Mark Menzynski
Adds a function for serializing a nv50_ir_prog_info structure, which is
needed for shader caching.

Signed-off-by: Mark Menzynski 
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h  |  4 +
 .../nouveau/codegen/nv50_ir_serialize.cpp | 81 +++
 2 files changed, 85 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 9eb8a4c4798..cdf19eeabcf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -278,6 +278,10 @@ namespace nv50_ir
 extern void
 nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *);
 
+/* Serialize a nv50_ir_prog_info structure and save it into blob */
+extern bool
+nv50_ir_prog_info_serialize(struct blob *, struct nv50_ir_prog_info *);
+
 /* Serialize a nv50_ir_prog_info_out structure and save it into blob */
 extern bool
 nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
index 077f3eba6c8..0f47189f10b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
@@ -17,6 +17,87 @@ enum InterpApply {
FLIP_GM107 = 7
 };
 
+extern bool
+nv50_ir_prog_info_serialize(struct blob *blob, struct nv50_ir_prog_info *info)
+{
+   blob_write_uint16(blob, info->target);
+   blob_write_uint8(blob, info->type);
+   blob_write_uint8(blob, info->optLevel);
+   blob_write_uint8(blob, info->dbgFlags);
+   blob_write_uint8(blob, info->omitLineNum);
+   blob_write_uint32(blob, info->bin.smemSize);
+   blob_write_uint16(blob, info->bin.maxOutput);
+   blob_write_uint8(blob, info->bin.sourceRep);
+
+   switch(info->bin.sourceRep) {
+  case PIPE_SHADER_IR_TGSI: {
+ struct tgsi_token *tokens = (struct tgsi_token *)info->bin.source;
+ unsigned int num_tokens = tgsi_num_tokens(tokens);
+
+ blob_write_uint32(blob, num_tokens);
+ blob_write_bytes(blob, tokens, num_tokens * sizeof(struct 
tgsi_token));
+ break;
+  }
+  case PIPE_SHADER_IR_NIR: {
+ struct nir_shader *nir = (struct nir_shader *)info->bin.source;
+ nir_serialize(blob, nir, false);
+ break;
+  }
+  default:
+ assert(!"unhandled info->bin.sourceRep");
+ return false;
+   }
+
+   blob_write_uint16(blob, info->immd.bufSize);
+   blob_write_bytes(blob, info->immd.buf, info->immd.bufSize * 
sizeof(*info->immd.buf));
+   blob_write_uint16(blob, info->immd.count);
+   blob_write_bytes(blob, info->immd.data, info->immd.count * 
sizeof(*info->immd.data));
+   blob_write_bytes(blob, info->immd.type, info->immd.count * 16); // for each 
vec4 (128 bit)
+
+   switch (info->type) {
+  case PIPE_SHADER_VERTEX:
+ blob_write_bytes(blob, info->prop.vp.inputMask,
+  4 * sizeof(*info->prop.vp.inputMask)); /* array of 
size 4 */
+ break;
+  case PIPE_SHADER_TESS_CTRL:
+ blob_write_uint32(blob, info->prop.cp.inputOffset);
+ blob_write_uint32(blob, info->prop.cp.sharedOffset);
+ blob_write_uint32(blob, info->prop.cp.gridInfoBase);
+ blob_write_bytes(blob, info->prop.cp.numThreads,
+  3 * sizeof(*info->prop.cp.numThreads)); /* array of 
size 3 */
+  case PIPE_SHADER_GEOMETRY:
+ blob_write_uint8(blob, info->prop.gp.inputPrim);
+ break;
+  case PIPE_SHADER_FRAGMENT:
+ blob_write_uint8(blob, info->prop.fp.persampleInvocation);
+ break;
+  default:
+ break;
+   }
+
+   blob_write_uint8(blob, info->io.auxCBSlot);
+   blob_write_uint16(blob, info->io.ucpBase);
+   blob_write_uint16(blob, info->io.drawInfoBase);
+   blob_write_uint16(blob, info->io.alphaRefBase);
+   blob_write_uint8(blob, info->io.pointSize);
+   blob_write_uint8(blob, info->io.viewportId);
+   blob_write_bytes(blob, info->io.backFaceColor, 2 * 
sizeof(*info->io.backFaceColor));
+   blob_write_uint8(blob, info->io.mul_zero_wins);
+   blob_write_uint8(blob, info->io.nv50styleSurfaces);
+   blob_write_uint16(blob, info->io.texBindBase);
+   blob_write_uint16(blob, info->io.fbtexBindBase);
+   blob_write_uint16(blob, info->io.suInfoBase);
+   blob_write_uint16(blob, info->io.bindlessBase);
+   blob_write_uint16(blob, info->io.bufInfoBase);
+   blob_write_uint16(blob, info->io.sampleInfoBase);
+   blob_write_uint8(blob, info->io.msInfoCBSlot);
+   blob_write_uint16(blob, info->io.msInfoBase);
+   blob_write_uint16(blob, info->io.uboInfoBase);
+   blob_write_uint8(blob, info->io.genUserClip);
+
+   return true;
+}
+
 extern bool
 nv50_ir_prog_info_out_serialize(struct blob

[Mesa-dev] [PATCH] nv50/ir: Add mul and mod constant optimizations

2019-07-23 Thread Mark Menzynski
Optimizations for 0/n, 1/n and 0%n.
No changes in shader db tests, because it is never used here, but it
should become handy.

Signed-off-by: Mark Menzynski 
---
 .../nouveau/codegen/nv50_ir_peephole.cpp  | 30 +--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 0b3220903b9..12069e19808 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1177,10 +1177,28 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
, int s)
   break;
 
case OP_DIV:
-  if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32))
+  if (i->dType != TYPE_S32 && i->dType != TYPE_U32)
  break;
+
   bld.setPosition(i, false);
-  if (imm0.reg.data.u32 == 0) {
+  if (s == 0) {
+ if (imm0.reg.data.u32 == 0) {
+i->op = OP_MOV;
+i->setSrc(1, NULL);
+ }
+ else if (imm0.reg.data.u32 == 1) {
+Value *tA, *tB;
+Instruction *slct;
+
+tA = bld.mkOp1v(OP_ABS, TYPE_U32, bld.getSSA(), i->getSrc(1));
+tB = bld.mkOp2v(OP_ADD, TYPE_S32, bld.getSSA(), tA, 
bld.loadImm(NULL, -1));
+slct = bld.mkCmp(OP_SLCT, CC_GT, i->dType, bld.getSSA(), TYPE_U32, 
bld.loadImm(NULL, 0), i->getSrc(1), tB);
+i->def(0).replace(slct->getDef(0), false);
+ }
+ break;
+  }
+
+  if (s != 1 || imm0.reg.data.u32 == 0) {
  break;
   } else
   if (imm0.reg.data.u32 == 1) {
@@ -1259,6 +1277,14 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
, int s)
   break;
 
case OP_MOD:
+  if (s == 0) {
+ if (imm0.reg.data.u32 == 0) {
+i->op = OP_MOV;
+i->setSrc(1, NULL);
+ }
+ break;
+  }
+
   if (s == 1 && imm0.isPow2()) {
  bld.setPosition(i, false);
  if (i->sType == TYPE_U32) {
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2] gm107/ir: Add stg, ldg instructions and function for checking offset length

2019-07-23 Thread Mark Menzynski
Nvidia actively uses these instructions, maybe they are better in
something.
Long offset checking function was made because these functions only have 24 bit
address offsets.

v2: removed long offset funnction

Signed-off-by: Mark Menzynski 
---
 .../nouveau/codegen/nv50_ir_emit_gm107.cpp| 24 +++
 1 file changed, 24 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 6eefe8f0025..8da5adb94ce 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -174,9 +174,11 @@ private:
void emitLDC();
void emitLDL();
void emitLDS();
+   void emitLDG();
void emitLD();
void emitSTL();
void emitSTS();
+   void emitSTG();
void emitST();
void emitALD();
void emitAST();
@@ -2414,6 +2416,17 @@ CodeEmitterGM107::emitLDS()
emitGPR  (0x00, insn->def(0));
 }
 
+void
+CodeEmitterGM107::emitLDG()
+{
+   emitInsn (0xeed0);
+   emitLDSTs(0x30, insn->dType);
+   emitLDSTc(0x2e);
+   emitField(0x2d, 1, insn->src(0).getIndirect(0)->getSize() == 8);
+   emitADDR (0x08, 0x14, 24, 0, insn->src(0));
+   emitGPR  (0x00, insn->def(0));
+}
+
 void
 CodeEmitterGM107::emitLD()
 {
@@ -2445,6 +2458,17 @@ CodeEmitterGM107::emitSTS()
emitGPR  (0x00, insn->src(1));
 }
 
+void
+CodeEmitterGM107::emitSTG()
+{
+   emitInsn (0xeed8);
+   emitLDSTs(0x30, insn->dType);
+   emitLDSTc(0x2e);
+   emitField(0x2d, 1, insn->src(0).getIndirect(0)->getSize() == 8);
+   emitADDR (0x08, 0x14, 24, 0, insn->src(0));
+   emitGPR  (0x00, insn->src(1));
+}
+
 void
 CodeEmitterGM107::emitST()
 {
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gm107/ir: Add stg, ldg instructions and function for checking offset length

2019-07-23 Thread Mark Menzynski
> > @@ -2414,6 +2428,17 @@ CodeEmitterGM107::emitLDS()
> > emitGPR  (0x00, insn->def(0));
> >  }
> >
> > +void
> > +CodeEmitterGM107::emitLDG()
> > +{
> > +   emitInsn (0xeed0);
> > +   emitLDSTs(0x30, insn->dType);
> > +   emitLDSTc(0x2e);
> > +   emitField(0x2d, 1, insn->src(0).getIndirect(0)->getSize() == 8);
>
> I didn't look, but we don't do something a bit more subtle on the
> other ones, like checking if there's an indirect access in the first
> place? With g[], it almost exclusively will be, but still...

It's done same in the original store and load functions.

> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gm107/ir: Add stg, ldg instructions and function for checking offset length

2019-07-19 Thread Mark Menzynski
On Fri, Jul 19, 2019 at 5:04 PM Ilia Mirkin  wrote:
>
> On Fri, Jul 19, 2019 at 10:57 AM Mark Menzynski  wrote:
> >
> > Nvidia actively uses these instructions, maybe they are better in
> > something.
> > Long offset checking function was made because these functions only have 24 
> > bit
> > address offsets.
> >
> > Signed-off-by: Mark Menzynski 
> > ---
> >  .../nouveau/codegen/nv50_ir_emit_gm107.cpp| 36 +++
> >  1 file changed, 36 insertions(+)
> >
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> > index 6eefe8f0025..c01a3017ba9 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> > @@ -87,6 +87,7 @@ private:
> > inline void emitADDR(int, int, int, int, const ValueRef &);
> > inline void emitCBUF(int, int, int, int, int, const ValueRef &);
> > inline bool longIMMD(const ValueRef &);
> > +   inline bool longOffset(const ValueRef &);
> > inline void emitIMMD(int, int, const ValueRef &);
> >
> > void emitCond3(int, CondCode);
> > @@ -174,9 +175,11 @@ private:
> > void emitLDC();
> > void emitLDL();
> > void emitLDS();
> > +   void emitLDG();
> > void emitLD();
> > void emitSTL();
> > void emitSTS();
> > +   void emitSTG();
> > void emitST();
> > void emitALD();
> > void emitAST();
> > @@ -333,6 +336,17 @@ CodeEmitterGM107::longIMMD(const ValueRef )
> > return false;
> >  }
> >
> > +bool
> > +CodeEmitterGM107::longOffset(const ValueRef )
> > +{
> > +   // TODO: check for other files as well?
> > +   if (ref.getFile() != FILE_MEMORY_GLOBAL)
> > +  return false;
>
> I haven't seen the uses (best to send stuff like this as a series),
> but you're saying that if it's not global memory, then it's not a long
> offset? I suspect in the caller it should be more like
>
> assert(file == global || !long offset) or something.
>
This is how I used it for Load. Store was used the same way. I have
not sent it because we didn't found any noticeable changes with that:

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index c01a3017ba9..f632178138b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -3603,7 +3603,12 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
   case FILE_MEMORY_CONST : emitLDC(); break;
   case FILE_MEMORY_LOCAL : emitLDL(); break;
   case FILE_MEMORY_SHARED: emitLDS(); break;
-  case FILE_MEMORY_GLOBAL: emitLD(); break;
+  case FILE_MEMORY_GLOBAL:
+ if (longOffset(insn->src(0)))
+emitLD();
+ else
+emitLDG();
+ break;
   default:
  assert(!"invalid load");
  emitNOP();
> > +
> > +   int32_t offset = ref.get()->reg.data.offset;
> > +   return offset >  0x7f || offset < -0x80;
>
> You have two spaces after the >. Remove one of them.
>
> > +}
> > +
> >  void
> >  CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef )
> >  {
> > @@ -2414,6 +2428,17 @@ CodeEmitterGM107::emitLDS()
> > emitGPR  (0x00, insn->def(0));
> >  }
> >
> > +void
> > +CodeEmitterGM107::emitLDG()
> > +{
> > +   emitInsn (0xeed0);
> > +   emitLDSTs(0x30, insn->dType);
> > +   emitLDSTc(0x2e);
> > +   emitField(0x2d, 1, insn->src(0).getIndirect(0)->getSize() == 8);
>
> I didn't look, but we don't do something a bit more subtle on the
> other ones, like checking if there's an indirect access in the first
> place? With g[], it almost exclusively will be, but still...
>
> > +   emitADDR (0x08, 0x14, 24, 0, insn->src(0));
> > +   emitGPR  (0x00, insn->def(0));
> > +}
> > +
> >  void
> >  CodeEmitterGM107::emitLD()
> >  {
> > @@ -2445,6 +2470,17 @@ CodeEmitterGM107::emitSTS()
> > emitGPR  (0x00, insn->src(1));
> >  }
> >
> > +void
> > +CodeEmitterGM107::emitSTG()
> > +{
> > +   emitInsn (0xeed8);
> > +   emitLDSTs(0x30, insn->dType);
> > +   emitLDSTc(0x2e);
> > +   emitField(0x2d, 1, insn->src(0).getIndirect(0)->getSize() == 8);
> > +   emitADDR (0x08, 0x14, 24, 0, insn->src(0));
> > +   emitGPR  (0x00, insn->src(1));
> > +}
> > +
> >  void
> >  CodeEmitterGM107::emitST()
> >  {
> > --
> > 2.21.0
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] gm107/ir: Add stg, ldg instructions and function for checking offset length

2019-07-19 Thread Mark Menzynski
Nvidia actively uses these instructions, maybe they are better in
something.
Long offset checking function was made because these functions only have 24 bit
address offsets.

Signed-off-by: Mark Menzynski 
---
 .../nouveau/codegen/nv50_ir_emit_gm107.cpp| 36 +++
 1 file changed, 36 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 6eefe8f0025..c01a3017ba9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -87,6 +87,7 @@ private:
inline void emitADDR(int, int, int, int, const ValueRef &);
inline void emitCBUF(int, int, int, int, int, const ValueRef &);
inline bool longIMMD(const ValueRef &);
+   inline bool longOffset(const ValueRef &);
inline void emitIMMD(int, int, const ValueRef &);
 
void emitCond3(int, CondCode);
@@ -174,9 +175,11 @@ private:
void emitLDC();
void emitLDL();
void emitLDS();
+   void emitLDG();
void emitLD();
void emitSTL();
void emitSTS();
+   void emitSTG();
void emitST();
void emitALD();
void emitAST();
@@ -333,6 +336,17 @@ CodeEmitterGM107::longIMMD(const ValueRef )
return false;
 }
 
+bool
+CodeEmitterGM107::longOffset(const ValueRef )
+{
+   // TODO: check for other files as well?
+   if (ref.getFile() != FILE_MEMORY_GLOBAL)
+  return false;
+
+   int32_t offset = ref.get()->reg.data.offset;
+   return offset >  0x7f || offset < -0x80;
+}
+
 void
 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef )
 {
@@ -2414,6 +2428,17 @@ CodeEmitterGM107::emitLDS()
emitGPR  (0x00, insn->def(0));
 }
 
+void
+CodeEmitterGM107::emitLDG()
+{
+   emitInsn (0xeed0);
+   emitLDSTs(0x30, insn->dType);
+   emitLDSTc(0x2e);
+   emitField(0x2d, 1, insn->src(0).getIndirect(0)->getSize() == 8);
+   emitADDR (0x08, 0x14, 24, 0, insn->src(0));
+   emitGPR  (0x00, insn->def(0));
+}
+
 void
 CodeEmitterGM107::emitLD()
 {
@@ -2445,6 +2470,17 @@ CodeEmitterGM107::emitSTS()
emitGPR  (0x00, insn->src(1));
 }
 
+void
+CodeEmitterGM107::emitSTG()
+{
+   emitInsn (0xeed8);
+   emitLDSTs(0x30, insn->dType);
+   emitLDSTc(0x2e);
+   emitField(0x2d, 1, insn->src(0).getIndirect(0)->getSize() == 8);
+   emitADDR (0x08, 0x14, 24, 0, insn->src(0));
+   emitGPR  (0x00, insn->src(1));
+}
+
 void
 CodeEmitterGM107::emitST()
 {
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] nvc0/ir: Fix assert accessing null pointer

2019-07-19 Thread Mark Menzynski
Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=111007
Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=67
Signed-off-by: Mark Menzynski 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index aca3b0afb1e..1f702a987d8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -51,12 +51,12 @@ NVC0LegalizeSSA::handleDIV(Instruction *i)
// Generate movs to the input regs for the call we want to generate
for (int s = 0; i->srcExists(s); ++s) {
   Instruction *ld = i->getSrc(s)->getInsn();
-  assert(ld->getSrc(0) != NULL);
   // check if we are moving an immediate, propagate it in that case
   if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV) ||
 !(ld->src(0).getFile() == FILE_IMMEDIATE))
  bld.mkMovToReg(s, i->getSrc(s));
   else {
+ assert(ld->getSrc(0) != NULL);
  bld.mkMovToReg(s, ld->getSrc(0));
  // Clear the src, to make code elimination possible here before we
  // delete the instruction i later
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev