[Mesa-dev] [PATCH v7 09/35] nvir/nir: run some passes to make the conversion easier

2018-04-16 Thread Karol Herbst
v2: add constant_folding
v6: print non final NIR only for verbose debugging

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 39 ++
 1 file changed, 39 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index b22c62fd434..0b7a5981f73 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -31,6 +31,12 @@
 #include "codegen/nv50_ir_lowering_helper.h"
 #include "codegen/nv50_ir_util.h"
 
+static int
+type_size(const struct glsl_type *type)
+{
+   return glsl_count_attribute_slots(type, false);
+}
+
 namespace {
 
 using namespace nv50_ir;
@@ -52,6 +58,39 @@ Converter::Converter(Program *prog, nir_shader *nir, 
nv50_ir_prog_info *info)
 bool
 Converter::run()
 {
+   bool progress;
+
+   if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
+  nir_print_shader(nir, stderr);
+
+   NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, 
(nir_lower_io_options)0);
+   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
+   NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
+   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+   NIR_PASS_V(nir, nir_lower_alu_to_scalar);
+   NIR_PASS_V(nir, nir_lower_phis_to_scalar);
+
+   do {
+  progress = false;
+  /* some ops depend on having constants as sources, but those can also
+   * point to expressions made from constants like 0 + 1
+   */
+  NIR_PASS(progress, nir, nir_opt_constant_folding);
+  NIR_PASS(progress, nir, nir_copy_prop);
+  NIR_PASS(progress, nir, nir_opt_dce);
+  NIR_PASS(progress, nir, nir_opt_dead_cf);
+   } while (progress);
+
+   NIR_PASS_V(nir, nir_lower_locals_to_regs);
+   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_local);
+   NIR_PASS_V(nir, nir_convert_from_ssa, true);
+
+   /* Garbage collect dead instructions */
+   nir_sweep(nir);
+
+   if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
+  nir_print_shader(nir, stderr);
+
return false;
 }
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 21/35] nvir/nir: implement load_(interpolated_)input/output

2018-04-16 Thread Karol Herbst
v3: and load_output
v4: use smarter getIndirect helper
use new getSlotAddress helper
v5: don't use const_offset directly
fix for indirects
v6: add support for interpolateAt
v7: fix compiler warnings
add load_barycentric_sample
handle load_output for fragment shaders

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 134 +
 1 file changed, 134 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index b34fe7739d8..740dee5c95a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1523,6 +1523,140 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_interpolated_input:
+   case nir_intrinsic_load_output: {
+  LValues  = convert(>dest);
+
+  /* FBFetch */
+  if (prog->getType() == Program::TYPE_FRAGMENT &&
+  op == nir_intrinsic_load_output) {
+ std::vector<Value*> defs, srcs;
+ uint8_t mask = 0;
+
+ srcs.push_back(getSSA());
+ srcs.push_back(getSSA());
+ Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 
0));
+ Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 
1));
+ mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
+ mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
+
+ srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 
0)));
+ srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), 
mkSysVal(SV_SAMPLE_INDEX, 0)));
+
+ for (auto i = 0u; i < insn->num_components; ++i) {
+defs.push_back(newDefs[i]);
+mask |= 1 << i;
+ }
+
+ TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, 
defs, srcs);
+ texi->tex.levelZero = 1;
+ texi->tex.mask = mask;
+ texi->tex.useOffsets = 0;
+ texi->tex.r = 0x;
+ texi->tex.s = 0x;
+
+ info->prop.fp.readsFramebuffer = true;
+ break;
+  }
+
+  const DataType dType = getDType(insn);
+  Value *indirect;
+  bool input = op != nir_intrinsic_load_output;
+  operation nvirOp;
+  uint32_t mode = 0;
+
+  auto idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input 
? 1 : 0, 0, indirect);
+  nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
+
+  /* see load_barycentric_* handling */
+  if (prog->getType() == Program::TYPE_FRAGMENT) {
+ mode = translateInterpMode(, nvirOp);
+ if (op == nir_intrinsic_load_interpolated_input) {
+ImmediateValue immMode;
+if (getSrc(>src[0], 
1)->getUniqueInsn()->src(0).getImmediate(immMode))
+   mode |= immMode.reg.data.u32;
+ }
+  }
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : 
FILE_SHADER_OUTPUT, 0, dType, address);
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+int s = 1;
+if (typeSizeof(dType) == 8) {
+   Value *lo = getSSA();
+   Value *hi = getSSA();
+   Instruction *interp;
+
+   interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s, getSrc(>src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+
+   Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : 
FILE_SHADER_OUTPUT, 0, dType, address + 4);
+   interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s++, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s++, getSrc(>src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+
+   mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
+} else {
+   Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s++, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s++, getSrc(>src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+}
+ } else {
+mkLoad(dType, n

[Mesa-dev] [PATCH v7 14/35] nvir/nir: parse NIR shader info

2018-04-16 Thread Karol Herbst
v2: parse a few more fields
v3: add special handling for GL_ISOLINES

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 60 ++
 1 file changed, 60 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 00ca1ae1512..4bb99c6635c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -96,6 +96,7 @@ private:
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
bool assignSlots();
+   bool parseNIR();
 
nir_shader *nir;
 
@@ -996,6 +997,60 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile 
file, operation op,
}
 }
 
+bool
+Converter::parseNIR()
+{
+   info->io.clipDistances = nir->info.clip_distance_array_size;
+   info->io.cullDistances = nir->info.cull_distance_array_size;
+
+   switch(prog->getType()) {
+   case Program::TYPE_COMPUTE:
+  info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
+  info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
+  info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
+  info->bin.smemSize = nir->info.cs.shared_size;
+  break;
+   case Program::TYPE_FRAGMENT:
+  info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
+  info->prop.fp.persampleInvocation =
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
+  info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
+  info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
+  info->prop.fp.usesSampleMaskIn =
+ !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
+  break;
+   case Program::TYPE_GEOMETRY:
+  info->prop.gp.inputPrim = nir->info.gs.input_primitive;
+  info->prop.gp.instanceCount = nir->info.gs.invocations;
+  info->prop.gp.maxVertices = nir->info.gs.vertices_out;
+  info->prop.gp.outputPrim = nir->info.gs.output_primitive;
+  break;
+   case Program::TYPE_TESSELLATION_CONTROL:
+   case Program::TYPE_TESSELLATION_EVAL:
+  if (nir->info.tess.primitive_mode == GL_ISOLINES)
+ info->prop.tp.domain = GL_LINES;
+  else
+ info->prop.tp.domain = nir->info.tess.primitive_mode;
+  info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
+  info->prop.tp.outputPrim =
+ nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
+  info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
+  info->prop.tp.winding = !nir->info.tess.ccw;
+  break;
+   case Program::TYPE_VERTEX:
+  info->prop.vp.usesDrawParameters =
+ (nir->info.system_values_read & 
BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
+ (nir->info.system_values_read & 
BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
+ (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
+  break;
+   default:
+  break;
+   }
+
+   return true;
+}
+
 bool
 Converter::run()
 {
@@ -1029,6 +1084,11 @@ Converter::run()
/* Garbage collect dead instructions */
nir_sweep(nir);
 
+   if (!parseNIR()) {
+  ERROR("Couldn't prase NIR!\n");
+  return false;
+   }
+
if (!assignSlots()) {
   ERROR("Couldn't assign slots!\n");
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 12/35] nvir/nir: run assignSlots

2018-04-16 Thread Karol Herbst
v2: add support for geometry shaders
set idx
add some missing mappings
fix for 64bit inputs/outputs
fix up some FP color output index messup
parse centroid flag
v3: fix arrays in outputs as well
fix input/ouput size calculation for tessellation shaders
v4: add getSlotAddress helper
fix for 64 bit typed inputs
v5: change getSlotAddress interface for easier use
fix sample inputs
fix slot counting for mat
v7: fix driver_location of images

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 626 +
 1 file changed, 626 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 89c55a08ef8..1d1c4526d2b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -69,6 +69,13 @@ private:
uint32_t getIndirect(nir_src *, uint8_t, Value*&);
uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&);
 
+   uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
+
+   void setInterpolate(nv50_ir_varying *,
+   decltype(nir_variable().data.interpolation),
+   bool centroid,
+   unsigned semantics);
+
bool isFloatType(nir_alu_type);
bool isSignedType(nir_alu_type);
bool isResultFloat(nir_op);
@@ -81,6 +88,8 @@ private:
std::vector getSTypes(nir_alu_instr*);
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
+   bool assignSlots();
+
nir_shader *nir;
 
NirDefMap ssaDefs;
@@ -303,6 +312,618 @@ Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t 
s, uint8_t c, Value *&
return idx;
 }
 
+static void
+vert_attrib_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
+{
+   if (slot >= VERT_ATTRIB_GENERIC0) {
+  *name = TGSI_SEMANTIC_GENERIC;
+  *index = slot - VERT_ATTRIB_GENERIC0;
+  return;
+   }
+
+   if (slot == VERT_ATTRIB_POINT_SIZE) {
+  ERROR("unknown vert attrib slot %u\n", slot);
+  assert(false);
+  return;
+   }
+
+   if (slot >= VERT_ATTRIB_TEX0) {
+  *name = TGSI_SEMANTIC_TEXCOORD;
+  *index = slot - VERT_ATTRIB_TEX0;
+  return;
+   }
+
+   switch (slot) {
+   case VERT_ATTRIB_COLOR0:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_COLOR1:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 1;
+  break;
+   case VERT_ATTRIB_EDGEFLAG:
+  *name = TGSI_SEMANTIC_EDGEFLAG;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_FOG:
+  *name = TGSI_SEMANTIC_FOG;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_NORMAL:
+  *name = TGSI_SEMANTIC_NORMAL;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_POS:
+  *name = TGSI_SEMANTIC_POSITION;
+  *index = 0;
+  break;
+   default:
+  ERROR("unknown vert attrib slot %u\n", slot);
+  assert(false);
+  break;
+   }
+}
+
+static void
+varying_slot_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
+{
+   if (slot >= VARYING_SLOT_PATCH0) {
+  *name = TGSI_SEMANTIC_PATCH;
+  *index = slot - VARYING_SLOT_PATCH0;
+  return;
+   }
+
+   if (slot >= VARYING_SLOT_VAR0) {
+  *name = TGSI_SEMANTIC_GENERIC;
+  *index = slot - VARYING_SLOT_VAR0;
+  return;
+   }
+
+   if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
+  *name = TGSI_SEMANTIC_TEXCOORD;
+  *index = slot - VARYING_SLOT_TEX0;
+  return;
+   }
+
+   switch (slot) {
+   case VARYING_SLOT_BFC0:
+  *name = TGSI_SEMANTIC_BCOLOR;
+  *index = 0;
+  break;
+   case VARYING_SLOT_BFC1:
+  *name = TGSI_SEMANTIC_BCOLOR;
+  *index = 1;
+  break;
+   case VARYING_SLOT_CLIP_DIST0:
+  *name = TGSI_SEMANTIC_CLIPDIST;
+  *index = 0;
+  break;
+   case VARYING_SLOT_CLIP_DIST1:
+  *name = TGSI_SEMANTIC_CLIPDIST;
+  *index = 1;
+  break;
+   case VARYING_SLOT_CLIP_VERTEX:
+  *name = TGSI_SEMANTIC_CLIPVERTEX;
+  *index = 0;
+  break;
+   case VARYING_SLOT_COL0:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 0;
+  break;
+   case VARYING_SLOT_COL1:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 1;
+  break;
+   case VARYING_SLOT_EDGE:
+  *name = TGSI_SEMANTIC_EDGEFLAG;
+  *index = 0;
+  break;
+   case VARYING_SLOT_FACE:
+  *name = TGSI_SEMANTIC_FACE;
+  *index = 0;
+  break;
+   case VARYING_SLOT_FOGC:
+  *name = TGSI_SEMANTIC_FOG;
+  *index = 0;
+  break;
+   case VARYING_SLOT_LAYER:
+  *name = TGSI_SEMANTIC_LAYER;
+  *index = 0;
+  break;
+   case VARYING_SLOT_PNTC:
+  *name = TGSI_SEMANTIC_PCOORD;
+  *index = 0;
+  break;
+   case VARYING_SLOT_POS:
+  *name = TGSI_SEMANTIC_POSITION;
+  *index = 0;
+  break;
+  

[Mesa-dev] [PATCH v7 15/35] nvir/nir: implement CFG handling

2018-04-16 Thread Karol Herbst
v6: fix loops with blocks at the end nothing points to
skip blocks with no instructions and no predecessors

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 270 -
 1 file changed, 268 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 4bb99c6635c..c2512b01d5a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -55,8 +55,10 @@ private:
typedef decltype(nir_ssa_def().index) NirSSADefIdx;
typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize;
typedef std::unordered_map<NirSSADefIdx, LValues> NirDefMap;
+   typedef std::unordered_map<decltype(nir_block().index), BasicBlock*> 
NirBlockMap;
 
LValues& convert(nir_alu_dest *);
+   BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
@@ -98,15 +100,46 @@ private:
bool assignSlots();
bool parseNIR();
 
+   bool visit(nir_block *);
+   bool visit(nir_cf_node *);
+   bool visit(nir_function *);
+   bool visit(nir_if *);
+   bool visit(nir_instr *);
+   bool visit(nir_jump_instr *);
+   bool visit(nir_loop *);
+
nir_shader *nir;
 
NirDefMap ssaDefs;
NirDefMap regDefs;
+   NirBlockMap blocks;
+   unsigned int curLoopDepth;
+
+   BasicBlock *exit;
+
+   union {
+  struct {
+ Value *position;
+  } fp;
+   };
 };
 
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
- nir(nir) {}
+ nir(nir),
+ curLoopDepth(0) {}
+
+BasicBlock *
+Converter::convert(nir_block *block)
+{
+   NirBlockMap::iterator it = blocks.find(block->index);
+   if (it != blocks.end())
+  return (*it).second;
+
+   BasicBlock *bb = new BasicBlock(func);
+   blocks[block->index] = bb;
+   return bb;
+}
 
 bool
 Converter::isFloatType(nir_alu_type type)
@@ -1051,6 +1084,234 @@ Converter::parseNIR()
return true;
 }
 
+bool
+Converter::visit(nir_function *function)
+{
+   /* we only support emiting the main function for now */
+   assert(!strcmp(function->name, "main"));
+   assert(function->impl);
+
+   /* usually the blocks will set everything up, but main is special */
+   BasicBlock *entry = new BasicBlock(prog->main);
+   exit = new BasicBlock(prog->main);
+   blocks[nir_start_block(function->impl)->index] = entry;
+   prog->main->setEntry(entry);
+   prog->main->setExit(exit);
+
+   setPosition(entry, true);
+
+   switch (prog->getType()) {
+   case Program::TYPE_TESSELLATION_CONTROL:
+  outBase = mkOp2v(
+ OP_SUB, TYPE_U32, getSSA(),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
+  break;
+   case Program::TYPE_FRAGMENT: {
+  Symbol *sv = mkSysVal(SV_POSITION, 3);
+  fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
+  fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
+  break;
+   }
+   default:
+  break;
+   }
+
+   nir_index_ssa_defs(function->impl);
+   foreach_list_typed(nir_cf_node, node, node, >impl->body) {
+  if (!visit(node))
+ return false;
+   }
+
+   bb->cfg.attach(>cfg, Graph::Edge::TREE);
+   setPosition(exit, true);
+
+   /* TODO: for non main function this needs to be a OP_RETURN */
+   mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
+   return true;
+}
+
+bool
+Converter::visit(nir_cf_node *node)
+{
+   switch (node->type) {
+   case nir_cf_node_block:
+  if (!visit(nir_cf_node_as_block(node)))
+ return false;
+  break;
+   case nir_cf_node_if:
+  if (!visit(nir_cf_node_as_if(node)))
+ return false;
+  break;
+   case nir_cf_node_loop:
+  if (!visit(nir_cf_node_as_loop(node)))
+ return false;
+  break;
+   default:
+  ERROR("unknown nir_cf_node type %u\n", node->type);
+  return false;
+   }
+   return true;
+}
+
+bool
+Converter::visit(nir_block *block)
+{
+   if (!block->predecessors->entries && block->instr_list.is_empty())
+  return true;
+
+   BasicBlock *bb = convert(block);
+
+   setPosition(bb, true);
+   nir_foreach_instr(insn, block) {
+  if (!visit(insn))
+ return false;
+   }
+   return true;
+}
+
+bool
+Converter::visit(nir_if *nif)
+{
+   DataType sType = getSType(nif->condition, false, false);
+   Value *src = getSrc(>condition, 0);
+
+   nir_block *lastThen = nir_if_last_then_block(nif);
+   nir_block *lastElse = nir_if_last_else_block(nif);
+
+   assert(!lastThen->successors[1]);
+   assert(!lastElse->successors[1]);
+
+   BasicBlock *ifBB = convert(nir_if_first_then_block(ni

[Mesa-dev] [PATCH v7 16/35] nvir/nir: implement nir_load_const_instr

2018-04-16 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index c2512b01d5a..f4f844021a2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -106,6 +106,7 @@ private:
bool visit(nir_if *);
bool visit(nir_instr *);
bool visit(nir_jump_instr *);
+   bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
 
nir_shader *nir;
@@ -1278,6 +1279,8 @@ Converter::visit(nir_instr *insn)
switch (insn->type) {
case nir_instr_type_jump:
   return visit(nir_instr_as_jump(insn));
+   case nir_instr_type_load_const:
+  return visit(nir_instr_as_load_const(insn));
default:
   ERROR("unknown nir_instr type %u\n", insn->type);
   return false;
@@ -1312,6 +1315,21 @@ Converter::visit(nir_jump_instr *insn)
return true;
 }
 
+bool
+Converter::visit(nir_load_const_instr *insn)
+{
+   assert(insn->def.bit_size <= 64);
+
+   LValues  = convert(>def);
+   for (int i = 0; i < insn->def.num_components; i++) {
+  if (insn->def.bit_size > 32)
+ loadImm(newDefs[i], insn->value.u64[i]);
+  else
+ loadImm(newDefs[i], insn->value.u32[i]);
+   }
+   return true;
+}
+
 bool
 Converter::run()
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 13/35] nvir/nir: add loadFrom and storeTo helpler

2018-04-16 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 72 ++
 1 file changed, 72 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 1d1c4526d2b..00ca1ae1512 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -76,6 +76,13 @@ private:
bool centroid,
unsigned semantics);
 
+   Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t 
base,
+ uint8_t c, Value *indirect0 = nullptr,
+ Value *indirect1 = nullptr, bool patch = false);
+   void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
+Value *src, uint8_t idx, uint8_t c, Value *indirect0 = nullptr,
+Value *indirect1 = nullptr);
+
bool isFloatType(nir_alu_type);
bool isSignedType(nir_alu_type);
bool isResultFloat(nir_op);
@@ -924,6 +931,71 @@ Converter::getSlotAddress(nir_intrinsic_instr *insn, 
uint8_t idx, uint8_t slot)
return vary[idx].slot[slot] * 4;
 }
 
+Instruction *
+Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
+uint32_t base, uint8_t c, Value *indirect0,
+Value *indirect1, bool patch)
+{
+   auto tySize = typeSizeof(ty);
+
+   if (tySize == 8 &&
+   (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) 
{
+  Value *lo = getSSA();
+  Value *hi = getSSA();
+
+  Instruction *loi =
+ mkLoad(TYPE_U32, lo,
+mkSymbol(file, i, TYPE_U32, base + c * tySize),
+indirect0);
+  loi->setIndirect(0, 1, indirect1);
+  loi->perPatch = patch;
+
+  Instruction *hii =
+ mkLoad(TYPE_U32, hi,
+mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
+indirect0);
+  hii->setIndirect(0, 1, indirect1);
+  hii->perPatch = patch;
+
+  return mkOp2(OP_MERGE, ty, def, lo, hi);
+   } else {
+  Instruction *ld =
+ mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
+  ld->setIndirect(0, 1, indirect1);
+  ld->perPatch = patch;
+  return ld;
+   }
+}
+
+void
+Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
+   DataType ty, Value *src, uint8_t idx, uint8_t c,
+   Value *indirect0, Value *indirect1)
+{
+   uint8_t size = typeSizeof(ty);
+   uint32_t address = getSlotAddress(insn, idx, c);
+
+   if (size == 8 && indirect0) {
+  Value *split[2];
+  mkSplit(split, 4, src);
+
+  if (op == OP_EXPORT) {
+ split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
+ split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
+  }
+
+  mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
+  split[0])->perPatch = info->out[idx].patch;
+  mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), 
indirect0,
+  split[1])->perPatch = info->out[idx].patch;
+   } else {
+  if (op == OP_EXPORT)
+ src = mkMov(getSSA(size), src, ty)->getDef(0);
+  mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
+  src)->perPatch = info->out[idx].patch;
+   }
+}
+
 bool
 Converter::run()
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 10/35] nvir/nir: track defs and provide easy access functions

2018-04-16 Thread Karol Herbst
v2: add helper function for indirects
v4: add new getIndirect overload for easier use
v5: use getSSA for ssa values
we can just create the values for unassigned registers in getSrc
v6: always create at least 32 bit values

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 132 +
 1 file changed, 132 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 0b7a5981f73..b61c6e90b1a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -31,6 +31,9 @@
 #include "codegen/nv50_ir_lowering_helper.h"
 #include "codegen/nv50_ir_util.h"
 
+#include 
+#include 
+
 static int
 type_size(const struct glsl_type *type)
 {
@@ -48,13 +51,142 @@ public:
 
bool run();
 private:
+   typedef std::vector<LValue*> LValues;
+   typedef decltype(nir_ssa_def().index) NirSSADefIdx;
+   typedef std::unordered_map<NirSSADefIdx, LValues> NirDefMap;
+
+   LValues& convert(nir_alu_dest *);
+   LValues& convert(nir_dest *);
+   LValues& convert(nir_register *);
+   LValues& convert(nir_ssa_def *);
+
+   Value* getSrc(nir_alu_src *, uint8_t component = 0);
+   Value* getSrc(nir_register *, uint8_t);
+   Value* getSrc(nir_src *, uint8_t, bool indirect = false);
+   Value* getSrc(nir_ssa_def *, uint8_t);
+
+   uint32_t getIndirect(nir_src *, uint8_t, Value*&);
+   uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&);
+
nir_shader *nir;
+
+   NirDefMap ssaDefs;
+   NirDefMap regDefs;
 };
 
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
  nir(nir) {}
 
+Converter::LValues&
+Converter::convert(nir_dest *dest)
+{
+   if (dest->is_ssa)
+  return convert(>ssa);
+   if (dest->reg.indirect) {
+  ERROR("no support for indirects.");
+  assert(false);
+   }
+   return convert(dest->reg.reg);
+}
+
+Converter::LValues&
+Converter::convert(nir_register *reg)
+{
+   NirDefMap::iterator it = regDefs.find(reg->index);
+   if (it != regDefs.end())
+  return (*it).second;
+
+   LValues newDef(reg->num_components);
+   for (auto i = 0u; i < reg->num_components; i++)
+  newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
+   return regDefs[reg->index] = newDef;
+}
+
+Converter::LValues&
+Converter::convert(nir_ssa_def *def)
+{
+   NirDefMap::iterator it = ssaDefs.find(def->index);
+   if (it != ssaDefs.end())
+  return (*it).second;
+
+   LValues newDef(def->num_components);
+   for (auto i = 0; i < def->num_components; i++)
+  newDef[i] = getSSA(std::max(4, def->bit_size / 8));
+   return ssaDefs[def->index] = newDef;
+}
+
+Value*
+Converter::getSrc(nir_alu_src *src, uint8_t component)
+{
+   if (src->abs || src->negate) {
+  ERROR("modifiers currently not supported on nir_alu_src\n");
+  assert(false);
+   }
+   return getSrc(>src, src->swizzle[component]);
+}
+
+Value*
+Converter::getSrc(nir_register *reg, uint8_t idx)
+{
+   NirDefMap::iterator it = regDefs.find(reg->index);
+   if (it == regDefs.end())
+  return convert(reg)[idx];
+   return (*it).second[idx];
+}
+
+Value*
+Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
+{
+   if (src->is_ssa)
+  return getSrc(src->ssa, idx);
+
+   if (src->reg.indirect) {
+  if (indirect)
+ return getSrc(src->reg.indirect, idx);
+  ERROR("no support for indirects.");
+  assert(false);
+  return nullptr;
+   }
+
+   return getSrc(src->reg.reg, idx);
+}
+
+Value*
+Converter::getSrc(nir_ssa_def *src, uint8_t idx)
+{
+   NirDefMap::iterator it = ssaDefs.find(src->index);
+   if (it == ssaDefs.end()) {
+  ERROR("SSA value %u not found\n", src->index);
+  assert(false);
+  return nullptr;
+   }
+   return (*it).second[idx];
+}
+
+uint32_t
+Converter::getIndirect(nir_src *src, uint8_t idx, Value *)
+{
+   nir_const_value *offset = nir_src_as_const_value(*src);
+
+   if (offset) {
+  indirect = nullptr;
+  return offset->u32[0];
+   }
+
+   indirect = getSrc(src, idx, true);
+   return 0;
+}
+
+uint32_t
+Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value 
*)
+{
+   auto idx = nir_intrinsic_base(insn) + getIndirect(>src[s], c, 
indirect);
+   if (indirect)
+  indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, 
loadImm(nullptr, 4));
+   return idx;
+}
+
 bool
 Converter::run()
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 08/35] nouveau: fix nir and TGSI shader cache collision

2018-04-16 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/gallium/drivers/nouveau/nouveau_screen.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c 
b/src/gallium/drivers/nouveau/nouveau_screen.c
index 2598c78a45b..655d2d090f6 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -156,9 +156,13 @@ nouveau_disk_cache_create(struct nouveau_screen *screen)
  _timestamp)) {
   res = asprintf(_str, "%u", mesa_timestamp);
   if (res != -1) {
+ uint64_t shader_debug_flags = 0;
+ if (screen->prefer_nir)
+shader_debug_flags |= 1 << 0;
+
  screen->disk_shader_cache =
 disk_cache_create(nouveau_screen_get_name(>base),
-  timestamp_str, 0);
+  timestamp_str, shader_debug_flags);
  free(timestamp_str);
   }
}
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 05/35] nvir: add lowering helper

2018-04-16 Thread Karol Herbst
this is mostly usefull for lazy IR converters not wanting to deal with 64 bit
lowering and other illegal stuff

v5: also handle SAT
v6: rename type variables
fixed lowering of NEG
add lowering of NOT

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/gallium/drivers/nouveau/Makefile.sources   |   2 +
 .../nouveau/codegen/nv50_ir_lowering_helper.cpp| 275 +
 .../nouveau/codegen/nv50_ir_lowering_helper.h  |  53 
 src/gallium/drivers/nouveau/meson.build|   2 +
 4 files changed, 332 insertions(+)
 create mode 100644 
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp
 create mode 100644 
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index fee5e59522e..ec344c63169 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -122,6 +122,8 @@ NV50_CODEGEN_SOURCES := \
codegen/nv50_ir_graph.h \
codegen/nv50_ir.h \
codegen/nv50_ir_inlines.h \
+   codegen/nv50_ir_lowering_helper.cpp \
+   codegen/nv50_ir_lowering_helper.h \
codegen/nv50_ir_lowering_nv50.cpp \
codegen/nv50_ir_peephole.cpp \
codegen/nv50_ir_print.cpp \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp
new file mode 100644
index 000..9373531b0b1
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp
@@ -0,0 +1,275 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Karol Herbst <kher...@redhat.com>
+ */
+
+#include "codegen/nv50_ir_lowering_helper.h"
+
+namespace nv50_ir {
+
+bool
+LoweringHelper::visit(Instruction *insn)
+{
+   switch (insn->op) {
+   case OP_ABS:
+  return handleABS(insn);
+   case OP_CVT:
+  return handleCVT(insn);
+   case OP_MAX:
+   case OP_MIN:
+  return handleMAXMIN(insn);
+   case OP_MOV:
+  return handleMOV(insn);
+   case OP_NEG:
+  return handleNEG(insn);
+   case OP_SAT:
+  return handleSAT(insn);
+   case OP_SLCT:
+  return handleSLCT(insn->asCmp());
+   case OP_AND:
+   case OP_NOT:
+   case OP_OR:
+   case OP_XOR:
+  return handleLogOp(insn);
+   default:
+  return true;
+   }
+}
+
+bool
+LoweringHelper::handleABS(Instruction *insn)
+{
+   DataType dTy = insn->dType;
+   if (!(dTy == TYPE_U64 || dTy == TYPE_S64))
+  return true;
+
+   bld.setPosition(insn, false);
+
+   Value *neg = bld.getSSA(8);
+   Value *negComp[2], *srcComp[2];
+   Value *lo = bld.getSSA(), *hi = bld.getSSA();
+   bld.mkOp2(OP_SUB, dTy, neg, bld.mkImm((uint64_t)0), insn->getSrc(0));
+   bld.mkSplit(negComp, 4, neg);
+   bld.mkSplit(srcComp, 4, insn->getSrc(0));
+   bld.mkCmp(OP_SLCT, CC_LT, TYPE_S32, lo, TYPE_S32, negComp[0], srcComp[0], 
srcComp[1]);
+   bld.mkCmp(OP_SLCT, CC_LT, TYPE_S32, hi, TYPE_S32, negComp[1], srcComp[1], 
srcComp[1]);
+   insn->op = OP_MERGE;
+   insn->setSrc(0, lo);
+   insn->setSrc(1, hi);
+
+   return true;
+}
+
+bool
+LoweringHelper::handleCVT(Instruction *insn)
+{
+   DataType dTy = insn->dType;
+   DataType sTy = insn->sType;
+
+   if (typeSizeof(dTy) <= 4 && typeSizeof(sTy) <= 4)
+  return true;
+
+   bld.setPosition(insn, false);
+
+   if ((dTy == TYPE_S32 && sTy == TYPE_S64) ||
+   (dTy == TYPE_U32 && sTy == TYPE_U64)) {
+  Value *src[2];
+  bld.mkSplit(src, 4, insn->getSrc(0));
+  insn->op = OP_MOV;
+  insn->setSrc(0, src[0]);
+   } else if (dTy == TYPE_S64 && sTy == TYPE_S32) {
+  Value *tmp = bld.getSSA();
+  bld.mkOp2(OP_SHR, TYPE_S32, tmp, insn->getSrc(0), 
bld.loadImm(bld.getSSA(), 31));
+  insn-&g

[Mesa-dev] [PATCH v7 11/35] nvir/nir: add nir type helper functions

2018-04-16 Thread Karol Herbst
v4: treat imul as unsigned
v5: remove pointless !!
v7: inot is unsigned as well

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 116 +
 1 file changed, 116 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index b61c6e90b1a..89c55a08ef8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -53,6 +53,7 @@ public:
 private:
typedef std::vector<LValue*> LValues;
typedef decltype(nir_ssa_def().index) NirSSADefIdx;
+   typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize;
typedef std::unordered_map<NirSSADefIdx, LValues> NirDefMap;
 
LValues& convert(nir_alu_dest *);
@@ -68,6 +69,18 @@ private:
uint32_t getIndirect(nir_src *, uint8_t, Value*&);
uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&);
 
+   bool isFloatType(nir_alu_type);
+   bool isSignedType(nir_alu_type);
+   bool isResultFloat(nir_op);
+   bool isResultSigned(nir_op);
+
+   DataType getDType(nir_alu_instr*);
+   DataType getDType(nir_intrinsic_instr*);
+   DataType getDType(nir_op, NirSSADefBitSize);
+
+   std::vector getSTypes(nir_alu_instr*);
+   DataType getSType(nir_src&, bool isFloat, bool isSigned);
+
nir_shader *nir;
 
NirDefMap ssaDefs;
@@ -78,6 +91,109 @@ Converter::Converter(Program *prog, nir_shader *nir, 
nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
  nir(nir) {}
 
+bool
+Converter::isFloatType(nir_alu_type type)
+{
+   return nir_alu_type_get_base_type(type) == nir_type_float;
+}
+
+bool
+Converter::isSignedType(nir_alu_type type)
+{
+   return nir_alu_type_get_base_type(type) == nir_type_int;
+}
+
+bool
+Converter::isResultFloat(nir_op op)
+{
+   const nir_op_info  = nir_op_infos[op];
+   if (info.output_type != nir_type_invalid)
+  return isFloatType(info.output_type);
+
+   ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
+   assert(false);
+   return true;
+}
+
+bool
+Converter::isResultSigned(nir_op op)
+{
+   switch (op) {
+   /* there is no umul and we get wrong results if the treat all muls as 
signed */
+   case nir_op_imul:
+   case nir_op_inot:
+  return false;
+   default:
+  const nir_op_info  = nir_op_infos[op];
+  if (info.output_type != nir_type_invalid)
+ return isSignedType(info.output_type);
+  ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
+  assert(false);
+  return true;
+   }
+}
+
+DataType
+Converter::getDType(nir_alu_instr *insn)
+{
+   if (insn->dest.dest.is_ssa)
+  return getDType(insn->op, insn->dest.dest.ssa.bit_size);
+   else
+  return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
+}
+
+DataType
+Converter::getDType(nir_intrinsic_instr *insn)
+{
+   if (insn->dest.is_ssa)
+  return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
+   else
+  return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
+}
+
+DataType
+Converter::getDType(nir_op op, Converter::NirSSADefBitSize bitSize)
+{
+   DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), 
isResultSigned(op));
+   if (ty == TYPE_NONE) {
+  ERROR("couldn't get Type for op %s with bitSize %u\n", 
nir_op_infos[op].name, bitSize);
+  assert(false);
+   }
+   return ty;
+}
+
+std::vector
+Converter::getSTypes(nir_alu_instr *insn)
+{
+   const nir_op_info  = nir_op_infos[insn->op];
+   std::vector res(info.num_inputs);
+
+   for (auto i = 0u; i < info.num_inputs; ++i) {
+  if (info.input_types[i] != nir_type_invalid) {
+ res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), 
isSignedType(info.input_types[i]));
+  } else {
+ ERROR("getSType not implemented for %s idx %u\n", info.name, i);
+ assert(false);
+ res[i] = TYPE_NONE;
+ break;
+  }
+   }
+
+   return res;
+}
+
+DataType
+Converter::getSType(nir_src , bool isFloat, bool isSigned)
+{
+   NirSSADefBitSize bitSize;
+   if (src.is_ssa)
+  bitSize = src.ssa->bit_size;
+   else
+  bitSize = src.reg.reg->bit_size;
+
+   return typeOfSize(bitSize / 8, isFloat, isSigned);
+}
+
 Converter::LValues&
 Converter::convert(nir_dest *dest)
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 03/35] nvir: print the shader type when dumping headers

2018-04-16 Thread Karol Herbst
this makes debugging the shader header a little easier

Signed-off-by: Karol Herbst <kher...@redhat.com>
Acked-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 9520d984bb3..3a11534df83 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -551,6 +551,7 @@ nvc0_program_dump(struct nvc0_program *prog)
unsigned pos;
 
if (prog->type != PIPE_SHADER_COMPUTE) {
+  debug_printf("dumping HDR for type %i\n", prog->type);
   for (pos = 0; pos < ARRAY_SIZE(prog->hdr); ++pos)
  debug_printf("HDR[%02"PRIxPTR"] = 0x%08x\n",
   pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 04/35] nvir: move common converter code in base class

2018-04-16 Thread Karol Herbst
v2: remove TGSI related bits

Signed-off-by: Karol Herbst <kher...@redhat.com>
Reviewed-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/Makefile.sources   |   2 +
 .../nouveau/codegen/nv50_ir_from_common.cpp| 107 +
 .../drivers/nouveau/codegen/nv50_ir_from_common.h  |  58 +++
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 106 +---
 src/gallium/drivers/nouveau/meson.build|   2 +
 5 files changed, 172 insertions(+), 103 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index 65f08c7d8d8..fee5e59522e 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -115,6 +115,8 @@ NV50_CODEGEN_SOURCES := \
codegen/nv50_ir_build_util.h \
codegen/nv50_ir_driver.h \
codegen/nv50_ir_emit_nv50.cpp \
+   codegen/nv50_ir_from_common.cpp \
+   codegen/nv50_ir_from_common.h \
codegen/nv50_ir_from_tgsi.cpp \
codegen/nv50_ir_graph.cpp \
codegen/nv50_ir_graph.h \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
new file mode 100644
index 000..0ad6087e588
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2011 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "codegen/nv50_ir_from_common.h"
+
+namespace nv50_ir {
+
+ConverterCommon::ConverterCommon(Program *prog, nv50_ir_prog_info *info)
+   :  BuildUtil(prog),
+  info(info) {}
+
+ConverterCommon::Subroutine *
+ConverterCommon::getSubroutine(unsigned ip)
+{
+   std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
+
+   if (it == sub.map.end())
+  it = sub.map.insert(std::make_pair(
+  ip, Subroutine(new Function(prog, "SUB", ip.first;
+
+   return >second;
+}
+
+ConverterCommon::Subroutine *
+ConverterCommon::getSubroutine(Function *f)
+{
+   unsigned ip = f->getLabel();
+   std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
+
+   if (it == sub.map.end())
+  it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
+
+   return >second;
+}
+
+uint8_t
+ConverterCommon::translateInterpMode(const struct nv50_ir_varying *var, 
operation& op)
+{
+   uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
+
+   if (var->flat)
+  mode = NV50_IR_INTERP_FLAT;
+   else
+   if (var->linear)
+  mode = NV50_IR_INTERP_LINEAR;
+   else
+   if (var->sc)
+  mode = NV50_IR_INTERP_SC;
+
+   op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
+  ? OP_PINTERP : OP_LINTERP;
+
+   if (var->centroid)
+  mode |= NV50_IR_INTERP_CENTROID;
+
+   return mode;
+}
+
+void
+ConverterCommon::handleUserClipPlanes()
+{
+   Value *res[8];
+   int n, i, c;
+
+   for (c = 0; c < 4; ++c) {
+  for (i = 0; i < info->io.genUserClip; ++i) {
+ Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
+TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
+ Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
+ if (c == 0)
+res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
+ else
+mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
+  }
+   }
+
+   const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
+
+   for (i = 0; i < info->io.genUserClip; ++i) {
+  n = i / 4 + first;
+  c = i % 4;
+  Symbol *sym =
+ mkSymbol(FILE_SHADER_OUTPUT, 0, TYP

[Mesa-dev] [PATCH v7 06/35] nouveau: add support for nir

2018-04-16 Thread Karol Herbst
not all those nir options are actually required, it just made the work a
little easier.

v2: fix asserts
parse compute shaders
don't lower bitfield_insert
v3: fix memory leak
v4: don't lower fmod32
v5: set lower_all_io_to_temps to false
fix memory leak because we take over ownership of the nir shader
merge: use the lowering helper
v6: include TGSI debug header for proper assert call
add nv50 support
v7: fix Automake build

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/gallium/drivers/nouveau/Automake.inc   |  3 +
 src/gallium/drivers/nouveau/Makefile.am|  5 ++
 src/gallium/drivers/nouveau/Makefile.sources   |  1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp|  3 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.h  |  1 +
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 76 ++
 src/gallium/drivers/nouveau/meson.build|  9 +--
 src/gallium/drivers/nouveau/nv50/nv50_program.c| 19 +-
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 40 
 src/gallium/drivers/nouveau/nv50/nv50_state.c  | 31 -
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c| 18 -
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 42 +++-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c  | 27 +++-
 13 files changed, 261 insertions(+), 14 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

diff --git a/src/gallium/drivers/nouveau/Automake.inc 
b/src/gallium/drivers/nouveau/Automake.inc
index 1d383fcb7b1..657790494dc 100644
--- a/src/gallium/drivers/nouveau/Automake.inc
+++ b/src/gallium/drivers/nouveau/Automake.inc
@@ -8,4 +8,7 @@ TARGET_LIB_DEPS += \
$(NOUVEAU_LIBS) \
$(LIBDRM_LIBS)
 
+TARGET_COMPILER_LIB_DEPS = \
+   $(top_builddir)/src/compiler/nir/libnir.la
+
 endif
diff --git a/src/gallium/drivers/nouveau/Makefile.am 
b/src/gallium/drivers/nouveau/Makefile.am
index f6126b54481..478dfcf437b 100644
--- a/src/gallium/drivers/nouveau/Makefile.am
+++ b/src/gallium/drivers/nouveau/Makefile.am
@@ -25,6 +25,10 @@ include $(top_srcdir)/src/gallium/Automake.inc
 
 AM_CPPFLAGS = \
-I$(top_srcdir)/include/drm-uapi \
+   -I$(top_builddir)/src/compiler/nir \
+   -I$(top_srcdir)/src/compiler/nir \
+   -I$(top_srcdir)/src/mapi \
+   -I$(top_srcdir)/src/mesa \
$(GALLIUM_DRIVER_CFLAGS) \
$(LIBDRM_CFLAGS) \
$(NOUVEAU_CFLAGS)
@@ -47,6 +51,7 @@ nouveau_compiler_SOURCES = \
 
 nouveau_compiler_LDADD = \
libnouveau.la \
+   $(top_builddir)/src/compiler/nir/libnir.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/util/libmesautil.la \
$(GALLIUM_COMMON_LIB_DEPS)
diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index ec344c63169..c6a1aff7110 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -117,6 +117,7 @@ NV50_CODEGEN_SOURCES := \
codegen/nv50_ir_emit_nv50.cpp \
codegen/nv50_ir_from_common.cpp \
codegen/nv50_ir_from_common.h \
+   codegen/nv50_ir_from_nir.cpp \
codegen/nv50_ir_from_tgsi.cpp \
codegen/nv50_ir_graph.cpp \
codegen/nv50_ir_graph.h \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index c987da99085..b3efef72b0f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1231,6 +1231,9 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
prog->optLevel = info->optLevel;
 
switch (info->bin.sourceRep) {
+   case PIPE_SHADER_IR_NIR:
+  ret = prog->makeFromNIR(info) ? 0 : -2;
+  break;
case PIPE_SHADER_IR_TGSI:
   ret = prog->makeFromTGSI(info) ? 0 : -2;
   break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index f4f3c708886..e5b4592a61e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -1255,6 +1255,7 @@ public:
inline void del(Function *fn, int& id) { allFuncs.remove(id); }
inline void add(Value *rval, int& id) { allRValues.insert(rval, id); }
 
+   bool makeFromNIR(struct nv50_ir_prog_info *);
bool makeFromTGSI(struct nv50_ir_prog_info *);
bool convertToSSA();
bool optimizeSSA(int level);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
new file mode 100644
index 000..b22c62fd434
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2017 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files 

[Mesa-dev] [PATCH v7 00/35] Nouveau NIR support

2018-04-16 Thread Karol Herbst
Changes to v6:
* fixed automake build
* fixed shader cache collision with TGSI shaders
* fixed handling of inot
* fixed location of images
* fixed load_output of fragment shaders (FBFETCH)
* added load_barycentric_sample handling
* some preparation for bindless_texture support

There will be some follow up patches to add support for:
* 4 constant offsets in TG4
* bindless_texture support for samplers and images

Review is important for patches 1-8, all the other paches touch the from_nir
file only.

Connor Abbott (1):
  nv50/ir/ra: Fix copying compound for moves

Karol Herbst (34):
  st/glsl_to_nir: run lower_output_reads on
!PIPE_CAP_TGSI_CAN_READ_OUTPUTS
  nvir: print the shader type when dumping headers
  nvir: move common converter code in base class
  nvir: add lowering helper
  nouveau: add support for nir
  nouveau: add env var to make nir default
  nouveau: fix nir and TGSI shader cache collision
  nvir/nir: run some passes to make the conversion easier
  nvir/nir: track defs and provide easy access functions
  nvir/nir: add nir type helper functions
  nvir/nir: run assignSlots
  nvir/nir: add loadFrom and storeTo helpler
  nvir/nir: parse NIR shader info
  nvir/nir: implement CFG handling
  nvir/nir: implement nir_load_const_instr
  nvir/nir: add skeleton for nir_intrinsic_instr
  nvir/nir: implement nir_alu_instr handling
  nvir/nir: implement nir_intrinsic_load_uniform
  nvir/nir: implement nir_intrinsic_store_(per_vertex_)output
  nvir/nir: implement load_(interpolated_)input/output
  nvir/nir: implement intrinsic_discard(_if)
  nvir/nir: implement loading system values
  nvir/nir: implement nir_ssa_undef_instr
  nvir/nir: implement nir_instr_type_tex
  nvir/nir: add getOperation for intrinsics
  nvir/nir: implement vote and ballot
  nvir/nir: implement variable indexing
  nvir/nir: implement geometry shader nir_intrinsics
  nvir/nir: implement nir_intrinsic_load_ubo
  nvir/nir: implement ssbo intrinsics
  nvir/nir: implement images
  nvir/nir: add memory barriers
  nvir/nir: implement load_per_vertex_output
  nvir/nir: implement intrinsic shader_clock

 src/gallium/drivers/nouveau/Automake.inc   |3 +
 src/gallium/drivers/nouveau/Makefile.am|5 +
 src/gallium/drivers/nouveau/Makefile.sources   |5 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp|3 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.h  |1 +
 .../nouveau/codegen/nv50_ir_from_common.cpp|  107 +
 .../drivers/nouveau/codegen/nv50_ir_from_common.h  |   58 +
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 3145 
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  106 +-
 .../nouveau/codegen/nv50_ir_lowering_helper.cpp|  275 ++
 .../nouveau/codegen/nv50_ir_lowering_helper.h  |   53 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp |   60 +-
 src/gallium/drivers/nouveau/meson.build|   13 +-
 src/gallium/drivers/nouveau/nouveau_screen.c   |   11 +-
 src/gallium/drivers/nouveau/nouveau_screen.h   |2 +
 src/gallium/drivers/nouveau/nv50/nv50_program.c|   19 +-
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |   44 +-
 src/gallium/drivers/nouveau/nv50/nv50_state.c  |   31 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c|   19 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |   61 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c  |   27 +-
 src/mesa/state_tracker/st_glsl_to_nir.cpp  |6 +
 22 files changed, 3908 insertions(+), 146 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
 create mode 100644 
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp
 create mode 100644 
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.h

-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 02/35] nv50/ir/ra: Fix copying compound for moves

2018-04-16 Thread Karol Herbst
From: Connor Abbott <cwabbo...@gmail.com>

In order to reduce moves when coalescing multiple registers into a
larger register, RA will try to coalesce MERGE instructions with their
definitions. For example, for something like this in GLSL:

uint a = ...;
uint b = ...;
uint64 x = packUint2x32(a, b);

The compiler will try to coalesce x with a and b, in the same way as
something like:

uint a = ...;
uint b = ...;
...
uint x = phi(a, b);

with the crucial difference that the definitions of a and b only clobber
part of the register, instead of the whole thing. This information is
carried through the compound flag and compMask bitmask. If compound is
set, then the value has been coalesced in such a way that not all the
defs clobber the entire register. The compMask bitmask describes which
subregister each def clobbers, although it does it in a slightly
convoluted way. It's an invariant that once compound is set on one def,
it must be set for all the defs in a given coalesced value.

In more detail, the constraints pass will first create extra moves:

uint a = ...;
uint b = ...;
uint a' = a;
uint b' = b;
uint64 x = packUint2x32(a', b');

and then RA will merge values involved in MERGE/SPLIT instructions,
merging x with a' and b' and making the combined value compound -- this
is relatively simple, and will always succeed since we just created a'
and b', so they never interfere with x, and x has no other definitions,
since we haven't started coalescing moves yet. Basically, we just replaced
the MERGE instruction with an equivalent sequence of partial writes to the
destination. The tricky part comes when we try to merge a' with a
and b' with b. We need to transfer the compound information from a' to a
and b' to b, which copyCompound() does, but we also need to transfer it
to any defs coalesced with a and b, which the code failed to do. Similarly,
if x is the argument to a phi instruction, then when we try to merge it
with other arguments to the same phi by coalescing moves, we'd have
problems guaranteeing that all the other merged defs stay up-to-date.

One tricky part of fixing this is that in order to properly propagate
the information from a' to a, we need to do it before the defs for a and
a' are merged in coalesceValues(), since we need to know which defs are
merged with a but not a' -- after coalesceValues() returns, all the defs
have been combined, so we don't know which is which. I took the approach
of calling copyCompound() inside coalesceValues(), instead of
afterwards.

Cc: Ilia Mirkin <imir...@alum.mit.edu>
Cc: Karol Herbst <kher...@redhat.com>
Tested-by: Karol Herbst <kher...@redhat.com>
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 60 ++
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 3a0e56e1385..df3116a6d73 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -890,6 +890,35 @@ GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval)
livei.insert(lval->livei);
 }
 
+// Used when coalescing moves. The non-compound value will become one, e.g.:
+// mov b32 $r0 $r2/ merge b64 $r0d { $r0 $r1 }
+// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d
+static inline void copyCompound(Value *dst, Value *src)
+{
+   LValue *ldst = dst->asLValue();
+   LValue *lsrc = src->asLValue();
+
+   if (ldst->compound && !lsrc->compound) {
+  LValue *swap = lsrc;
+  lsrc = ldst;
+  ldst = swap;
+   }
+
+   assert(!ldst->compound);
+
+   if (lsrc->compound) {
+  Value *dstRep = ldst->join;
+  for (Value::DefIterator d = dstRep->defs.begin(); d != 
dstRep->defs.end();
+   ++d) {
+ LValue *ldst = (*d)->get()->asLValue();
+ if (!ldst->compound)
+ldst->compMask = 0xff;
+ ldst->compound = 1;
+ ldst->compMask &= lsrc->compMask;
+  }
+   }
+}
+
 bool
 GCRA::coalesceValues(Value *dst, Value *src, bool force)
 {
@@ -932,9 +961,16 @@ GCRA::coalesceValues(Value *dst, Value *src, bool force)
if (!force && nRep->livei.overlaps(nVal->livei))
   return false;
 
+   // TODO: Handle this case properly.
+   if (!force && rep->compound && val->compound)
+  return false;
+
INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n",
 rep->id, rep->reg.data.id, val->id);
 
+   if (!force)
+  copyCompound(dst, src);
+
// set join pointer of all values joined with val
for (Value::DefIterator def = val->defs.begin(); def != val->defs.end();
 ++def)
@@ -997,24 +1033,6 @@ static inline uint8_t makeCompMask(int compSize, int 
base, int size)
}
 }
 
-// Used when 

[Mesa-dev] [PATCH v7 01/35] st/glsl_to_nir: run lower_output_reads on !PIPE_CAP_TGSI_CAN_READ_OUTPUTS

2018-04-16 Thread Karol Herbst
this is required for Drivers which don't allow reading from outputs.

Reviewed-by: Timothy Arceri <tarc...@itsqueeze.com>
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/mesa/state_tracker/st_glsl_to_nir.cpp | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index bcf6a7ceb6a..6502aec370f 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -44,6 +44,7 @@
 #include "compiler/glsl_types.h"
 #include "compiler/glsl/glsl_to_nir.h"
 #include "compiler/glsl/ir.h"
+#include "compiler/glsl/ir_optimization.h"
 #include "compiler/glsl/string_to_uint_map.h"
 
 
@@ -553,6 +554,7 @@ st_nir_get_mesa_program(struct gl_context *ctx,
 struct gl_linked_shader *shader)
 {
struct st_context *st = st_context(ctx);
+   struct pipe_screen *pscreen = ctx->st->pipe->screen;
struct gl_program *prog;
 
validate_ir_tree(shader->ir);
@@ -565,6 +567,10 @@ st_nir_get_mesa_program(struct gl_context *ctx,
_mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader,
prog->Parameters);
 
+   /* Remove reads from output registers. */
+   if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS))
+  lower_output_reads(shader->Stage, shader->ir);
+
if (ctx->_Shader->Flags & GLSL_DUMP) {
   _mesa_log("\n");
   _mesa_log("GLSL IR for linked %s program %d:\n",
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 07/35] nouveau: add env var to make nir default

2018-04-16 Thread Karol Herbst
v2: allow for non debug builds as well
v3: move reading out env var more global
disable tg4 with multiple offsets with nir
disable caps for 64 bit types
v6: nv50 support
disable MS images
disable bindless textures

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/gallium/drivers/nouveau/nouveau_screen.c   |  5 +
 src/gallium/drivers/nouveau/nouveau_screen.h   |  2 ++
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |  4 +++-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 19 +--
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c 
b/src/gallium/drivers/nouveau/nouveau_screen.c
index c144b39b2dd..2598c78a45b 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -176,9 +176,14 @@ nouveau_screen_init(struct nouveau_screen *screen, struct 
nouveau_device *dev)
union nouveau_bo_config mm_config;
 
char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG");
+   char *use_nir = getenv("NV50_PROG_USE_NIR");
+
if (nv_dbg)
   nouveau_mesa_debug = atoi(nv_dbg);
 
+   if (use_nir)
+  screen->prefer_nir = strtol(use_nir, NULL, 0) == 1;
+
/* These must be set before any failure is possible, as the cleanup
 * paths assume they're responsible for deleting them.
 */
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h 
b/src/gallium/drivers/nouveau/nouveau_screen.h
index e4fbae99ca4..1229b66b26f 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -62,6 +62,8 @@ struct nouveau_screen {
 
struct disk_cache *disk_shader_cache;
 
+   bool prefer_nir;
+
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
union {
   uint64_t v[29];
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index e91ea8d08c1..6f0a30ea026 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -311,6 +311,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen,
  enum pipe_shader_type shader,
  enum pipe_shader_cap param)
 {
+   const struct nouveau_screen *screen = nouveau_screen(pscreen);
+
switch (shader) {
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
@@ -364,7 +366,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
   return MIN2(16, PIPE_MAX_SAMPLERS);
case PIPE_SHADER_CAP_PREFERRED_IR:
-  return PIPE_SHADER_IR_TGSI;
+  return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
   return 32;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 15662093eb6..1f558aeaf4b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -90,9 +90,11 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
 
if (bindings & PIPE_BIND_SHADER_IMAGE) {
   if (sample_count > 0 &&
-  nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS) {
+  (nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS ||
+   nouveau_screen(pscreen)->prefer_nir)) {
  /* MS images are currently unsupported on Maxwell because they have to
   * be handled explicitly. */
+ /* MS images are currently unsupported with NIR */
  return false;
   }
 
@@ -112,7 +114,8 @@ static int
 nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 {
const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
-   struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+   const struct nouveau_screen *screen = nouveau_screen(pscreen);
+   struct nouveau_device *dev = screen->device;
 
switch (param) {
/* non-boolean caps */
@@ -216,7 +219,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
-   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
@@ -257,6 +259,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_QUERY_SO_OVERFLOW:
   return 1;
+   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+  /* TODO: nir doesn't support tg4 with multiple offsets */
+  return screen->prefer_nir ? 0 : 1;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
   return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
case PIPE_CAP_TGSI_

Re: [Mesa-dev] [PATCH v2 2/3] nir: add support for bindless_texture samplers

2018-04-12 Thread Karol Herbst
On Thu, Apr 12, 2018 at 6:33 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> On Thu, Apr 12, 2018 at 7:36 AM, Karol Herbst <kher...@redhat.com> wrote:
>>
>> On Tue, Apr 10, 2018 at 5:10 PM, Jason Ekstrand <ja...@jlekstrand.net>
>> wrote:
>> > On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst <kher...@redhat.com>
>> > wrote:
>> >>
>> >> v2: add both texture and sampler handles
>> >>
>> >> Signed-off-by: Karol Herbst <kher...@redhat.com>
>> >> ---
>> >>  src/compiler/glsl/glsl_to_nir.cpp | 17 +++--
>> >>  src/compiler/nir/nir.h|  2 ++
>> >>  src/compiler/nir/nir_print.c  |  6 ++
>> >>  3 files changed, 23 insertions(+), 2 deletions(-)
>> >>
>> >> diff --git a/src/compiler/glsl/glsl_to_nir.cpp
>> >> b/src/compiler/glsl/glsl_to_nir.cpp
>> >> index dbb58d82e8f..9f233637306 100644
>> >> --- a/src/compiler/glsl/glsl_to_nir.cpp
>> >> +++ b/src/compiler/glsl/glsl_to_nir.cpp
>> >> @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir)
>> >>  {
>> >> unsigned num_srcs;
>> >> nir_texop op;
>> >> +   bool bindless =
>> >> ir->sampler->variable_referenced()->contains_bindless();
>> >
>> >
>> > What happens if I have a uniform struct containing both a regular
>> > sampler
>> > and a bindless sampler?  I think this should be possible.
>> >
>>
>> well currently mesa just fails to compile, but even if it would I
>> don't see a way how we know with a ir_dereference if we reference a
>> bindless or bound sampler.
>>
>> The glsl_type doesn't tell us either and maybe it makes sense to add a
>> is_bindless method to glsl_type so that we can use it in places like
>> here? ir->sampler->type gives me the sampler type, but lacks the
>> information if it is bindless or not. Any thoughts?
>
>
> That seems like it's probably reasonable.  I'm not sure if we really want
> different types.  Another option would be to handle it as a layout qualifier
> on the structure type fields.  I'm not sure which is better.
>

I think we should add a field and add a is_opaque method to fix
glsl_type::contains_opaque, which is also broken, but we could do that
with a new type as well :(

>>
>> >>
>> >> +
>> >> switch (ir->op) {
>> >> case ir_tex:
>> >>op = nir_texop_tex;
>> >> @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir)
>> >>num_srcs++;
>> >> if (ir->offset != NULL)
>> >>num_srcs++;
>> >> +   if (bindless)
>> >> +  num_srcs++;
>> >>
>> >> nir_tex_instr *instr = nir_tex_instr_create(this->shader,
>> >> num_srcs);
>> >>
>> >> @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir)
>> >>unreachable("not reached");
>> >> }
>> >>
>> >> -   instr->texture = evaluate_deref(>instr, ir->sampler);
>> >> -
>> >> unsigned src_number = 0;
>> >>
>> >> +   /* for bindless we use the texture handle src */
>> >> +   if (bindless) {
>> >> +  instr->texture = NULL;
>> >> +  instr->src[src_number].src =
>> >> + nir_src_for_ssa(evaluate_rvalue(ir->sampler));
>> >> +  instr->src[src_number].src_type = nir_tex_src_texture_handle;
>> >> +  src_number++;
>> >> +   } else {
>> >> +  instr->texture = evaluate_deref(>instr, ir->sampler);
>> >> +   }
>> >> +
>> >> if (ir->coordinate != NULL) {
>> >>instr->coord_components = ir->coordinate->type->vector_elements;
>> >>instr->src[src_number].src =
>> >> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> >> index f33049d7134..e395352f89c 100644
>> >> --- a/src/compiler/nir/nir.h
>> >> +++ b/src/compiler/nir/nir.h
>> >> @@ -1218,6 +1218,8 @@ typedef enum {
>> >> nir_tex_src_texture_offset, /* < dynamically uniform indirect
>> >> offset
>> >> */
>> >> nir_tex_src_sampler_offset, /* < dynamically uniform indirect
>> >> offset
>> >> */
>> >> nir_tex_src_plane,  /* < selects plane for planar textures
>> >> */
>> >> +   nir_tex_src_texture_handle, /* < handle for bindless texture */
>> >> +   nir_tex_src_sampler_handle, /* < handle for bindless sampler */
>> >> nir_num_tex_src_types
>> >>  } nir_tex_src_type;
>> >>
>> >> diff --git a/src/compiler/nir/nir_print.c
>> >> b/src/compiler/nir/nir_print.c
>> >> index 21f13097651..52f20b1eb10 100644
>> >> --- a/src/compiler/nir/nir_print.c
>> >> +++ b/src/compiler/nir/nir_print.c
>> >> @@ -778,6 +778,12 @@ print_tex_instr(nir_tex_instr *instr, print_state
>> >> *state)
>> >>case nir_tex_src_plane:
>> >>   fprintf(fp, "(plane)");
>> >>   break;
>> >> +  case nir_tex_src_texture_handle:
>> >> + fprintf(fp, "(texture_handle)");
>> >> + break;
>> >> +  case nir_tex_src_sampler_handle:
>> >> + fprintf(fp, "(sampler_handle)");
>> >> + break;
>> >>
>> >>default:
>> >>   unreachable("Invalid texture source type");
>> >> --
>> >> 2.14.3
>> >>
>> >
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/3] nir: add support for bindless_texture samplers

2018-04-12 Thread Karol Herbst
On Tue, Apr 10, 2018 at 5:10 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst <kher...@redhat.com> wrote:
>>
>> v2: add both texture and sampler handles
>>
>> Signed-off-by: Karol Herbst <kher...@redhat.com>
>> ---
>>  src/compiler/glsl/glsl_to_nir.cpp | 17 +++--
>>  src/compiler/nir/nir.h|  2 ++
>>  src/compiler/nir/nir_print.c  |  6 ++
>>  3 files changed, 23 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/compiler/glsl/glsl_to_nir.cpp
>> b/src/compiler/glsl/glsl_to_nir.cpp
>> index dbb58d82e8f..9f233637306 100644
>> --- a/src/compiler/glsl/glsl_to_nir.cpp
>> +++ b/src/compiler/glsl/glsl_to_nir.cpp
>> @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir)
>>  {
>> unsigned num_srcs;
>> nir_texop op;
>> +   bool bindless =
>> ir->sampler->variable_referenced()->contains_bindless();
>
>
> What happens if I have a uniform struct containing both a regular sampler
> and a bindless sampler?  I think this should be possible.
>

well currently mesa just fails to compile, but even if it would I
don't see a way how we know with a ir_dereference if we reference a
bindless or bound sampler.

The glsl_type doesn't tell us either and maybe it makes sense to add a
is_bindless method to glsl_type so that we can use it in places like
here? ir->sampler->type gives me the sampler type, but lacks the
information if it is bindless or not. Any thoughts?

>>
>> +
>> switch (ir->op) {
>> case ir_tex:
>>op = nir_texop_tex;
>> @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir)
>>num_srcs++;
>> if (ir->offset != NULL)
>>num_srcs++;
>> +   if (bindless)
>> +  num_srcs++;
>>
>> nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
>>
>> @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir)
>>unreachable("not reached");
>> }
>>
>> -   instr->texture = evaluate_deref(>instr, ir->sampler);
>> -
>> unsigned src_number = 0;
>>
>> +   /* for bindless we use the texture handle src */
>> +   if (bindless) {
>> +  instr->texture = NULL;
>> +  instr->src[src_number].src =
>> + nir_src_for_ssa(evaluate_rvalue(ir->sampler));
>> +  instr->src[src_number].src_type = nir_tex_src_texture_handle;
>> +  src_number++;
>> +   } else {
>> +  instr->texture = evaluate_deref(>instr, ir->sampler);
>> +   }
>> +
>> if (ir->coordinate != NULL) {
>>instr->coord_components = ir->coordinate->type->vector_elements;
>>instr->src[src_number].src =
>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> index f33049d7134..e395352f89c 100644
>> --- a/src/compiler/nir/nir.h
>> +++ b/src/compiler/nir/nir.h
>> @@ -1218,6 +1218,8 @@ typedef enum {
>> nir_tex_src_texture_offset, /* < dynamically uniform indirect offset
>> */
>> nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset
>> */
>> nir_tex_src_plane,  /* < selects plane for planar textures */
>> +   nir_tex_src_texture_handle, /* < handle for bindless texture */
>> +   nir_tex_src_sampler_handle, /* < handle for bindless sampler */
>> nir_num_tex_src_types
>>  } nir_tex_src_type;
>>
>> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
>> index 21f13097651..52f20b1eb10 100644
>> --- a/src/compiler/nir/nir_print.c
>> +++ b/src/compiler/nir/nir_print.c
>> @@ -778,6 +778,12 @@ print_tex_instr(nir_tex_instr *instr, print_state
>> *state)
>>case nir_tex_src_plane:
>>   fprintf(fp, "(plane)");
>>   break;
>> +  case nir_tex_src_texture_handle:
>> + fprintf(fp, "(texture_handle)");
>> + break;
>> +  case nir_tex_src_sampler_handle:
>> + fprintf(fp, "(sampler_handle)");
>> + break;
>>
>>default:
>>   unreachable("Invalid texture source type");
>> --
>> 2.14.3
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-10 Thread Karol Herbst
On Tue, Apr 10, 2018 at 6:01 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> On Tue, Apr 10, 2018 at 8:35 AM, Karol Herbst <kher...@redhat.com> wrote:
>>
>> On Tue, Apr 10, 2018 at 5:12 PM, Jason Ekstrand <ja...@jlekstrand.net>
>> wrote:
>> > I still don't see anything to make nir_validate not fail out on you if
>> > it
>> > sees a read or a write to/from an IMAGE or SAMPLER.
>> >
>>
>> what kind of glsl code are you talking about here? I wrote some tests
>> and things just seem to work out. I wasn't able to hit any other
>> issues.
>
>
> Were they tests where GLSL was able to copy propagate such that NIR never
> saw a write to the image/sampler variable?
>

Well the trivial one is where you directly consume the uniform.

>>
>> > On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst <kher...@redhat.com>
>> > wrote:
>> >>
>> >> v2: fix assertion for bindless to non bindless assignments
>> >>
>> >> Signed-off-by: Karol Herbst <kher...@redhat.com>
>> >> ---
>> >>  src/compiler/nir/nir_split_var_copies.c | 8 +++-
>> >>  1 file changed, 7 insertions(+), 1 deletion(-)
>> >>
>> >> diff --git a/src/compiler/nir/nir_split_var_copies.c
>> >> b/src/compiler/nir/nir_split_var_copies.c
>> >> index bc3ceedbdb8..e592754d770 100644
>> >> --- a/src/compiler/nir/nir_split_var_copies.c
>> >> +++ b/src/compiler/nir/nir_split_var_copies.c
>> >> @@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct
>> >> split_var_copies_state *state)
>> >>nir_deref_var *src_head = intrinsic->variables[1];
>> >>nir_deref *dest_tail = nir_deref_tail(_head->deref);
>> >>nir_deref *src_tail = nir_deref_tail(_head->deref);
>> >> +  enum glsl_base_type base_type =
>> >> glsl_get_base_type(src_tail->type);
>> >>
>> >> -  switch (glsl_get_base_type(src_tail->type)) {
>> >> +  switch (base_type) {
>> >>case GLSL_TYPE_ARRAY:
>> >>case GLSL_TYPE_STRUCT:
>> >>   split_var_copy_instr(intrinsic, dest_head, src_head,
>> >> @@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct
>> >> split_var_copies_state *state)
>> >>  ralloc_steal(state->dead_ctx, instr);
>> >>   }
>> >>   break;
>> >> +  /* for bindless those are uint64 */
>> >> +  case GLSL_TYPE_IMAGE:
>> >> +  case GLSL_TYPE_SAMPLER:
>> >> + assert(src_head->var->data.bindless ||
>> >> +glsl_get_base_type(src_head->var->type) == base_type);
>> >>case GLSL_TYPE_INT:
>> >>case GLSL_TYPE_UINT:
>> >>case GLSL_TYPE_INT16:
>> >> --
>> >> 2.14.3
>> >>
>> >
>> >
>> > ___
>> > mesa-dev mailing list
>> > mesa-dev@lists.freedesktop.org
>> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>> >
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 3/3] glsl/nir: fix variable type for image intrinsics and ubos

2018-04-10 Thread Karol Herbst
On Tue, Apr 10, 2018 at 5:11 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst <kher...@redhat.com> wrote:
>>
>> If the bindless image is passed through a struct we ended up getting the
>> glsl_type of the struct, not the image.
>>
>> variable_referenced points to the declaration of the struct, so it won't
>> work
>> for bindless images. So just drop it.
>>
>> Signed-off-by: Karol Herbst <kher...@redhat.com>
>> ---
>>  src/compiler/glsl/glsl_to_nir.cpp | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/src/compiler/glsl/glsl_to_nir.cpp
>> b/src/compiler/glsl/glsl_to_nir.cpp
>> index 9f233637306..bb9ba3af04a 100644
>> --- a/src/compiler/glsl/glsl_to_nir.cpp
>> +++ b/src/compiler/glsl/glsl_to_nir.cpp
>> @@ -893,7 +893,7 @@ nir_visitor::visit(ir_call *ir)
>>   exec_node *param = ir->actual_parameters.get_head();
>>   ir_dereference *image = (ir_dereference *)param;
>>   const glsl_type *type =
>> -image->variable_referenced()->type->without_array();
>> +image->type->without_array();
>
>
> I asked this question on the last version as well: Do we really need
> without_array()?
>

I don't think so actually, because it should be the sampler type
already. I just forgot about that.

>>
>>   instr->variables[0] = evaluate_deref(>instr, image);
>>   param = param->get_next();
>> --
>> 2.14.3
>>
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-10 Thread Karol Herbst
On Tue, Apr 10, 2018 at 5:12 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> I still don't see anything to make nir_validate not fail out on you if it
> sees a read or a write to/from an IMAGE or SAMPLER.
>

what kind of glsl code are you talking about here? I wrote some tests
and things just seem to work out. I wasn't able to hit any other
issues.

> On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst <kher...@redhat.com> wrote:
>>
>> v2: fix assertion for bindless to non bindless assignments
>>
>> Signed-off-by: Karol Herbst <kher...@redhat.com>
>> ---
>>  src/compiler/nir/nir_split_var_copies.c | 8 +++-
>>  1 file changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/compiler/nir/nir_split_var_copies.c
>> b/src/compiler/nir/nir_split_var_copies.c
>> index bc3ceedbdb8..e592754d770 100644
>> --- a/src/compiler/nir/nir_split_var_copies.c
>> +++ b/src/compiler/nir/nir_split_var_copies.c
>> @@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct
>> split_var_copies_state *state)
>>nir_deref_var *src_head = intrinsic->variables[1];
>>nir_deref *dest_tail = nir_deref_tail(_head->deref);
>>nir_deref *src_tail = nir_deref_tail(_head->deref);
>> +  enum glsl_base_type base_type = glsl_get_base_type(src_tail->type);
>>
>> -  switch (glsl_get_base_type(src_tail->type)) {
>> +  switch (base_type) {
>>case GLSL_TYPE_ARRAY:
>>case GLSL_TYPE_STRUCT:
>>   split_var_copy_instr(intrinsic, dest_head, src_head,
>> @@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct
>> split_var_copies_state *state)
>>  ralloc_steal(state->dead_ctx, instr);
>>   }
>>   break;
>> +  /* for bindless those are uint64 */
>> +  case GLSL_TYPE_IMAGE:
>> +  case GLSL_TYPE_SAMPLER:
>> + assert(src_head->var->data.bindless ||
>> +glsl_get_base_type(src_head->var->type) == base_type);
>>case GLSL_TYPE_INT:
>>case GLSL_TYPE_UINT:
>>case GLSL_TYPE_INT16:
>> --
>> 2.14.3
>>
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/3] nir: add support for bindless_texture samplers

2018-04-10 Thread Karol Herbst
v2: add both texture and sampler handles

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/glsl/glsl_to_nir.cpp | 17 +++--
 src/compiler/nir/nir.h|  2 ++
 src/compiler/nir/nir_print.c  |  6 ++
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index dbb58d82e8f..9f233637306 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir)
 {
unsigned num_srcs;
nir_texop op;
+   bool bindless = ir->sampler->variable_referenced()->contains_bindless();
+
switch (ir->op) {
case ir_tex:
   op = nir_texop_tex;
@@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir)
   num_srcs++;
if (ir->offset != NULL)
   num_srcs++;
+   if (bindless)
+  num_srcs++;
 
nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
 
@@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir)
   unreachable("not reached");
}
 
-   instr->texture = evaluate_deref(>instr, ir->sampler);
-
unsigned src_number = 0;
 
+   /* for bindless we use the texture handle src */
+   if (bindless) {
+  instr->texture = NULL;
+  instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->sampler));
+  instr->src[src_number].src_type = nir_tex_src_texture_handle;
+  src_number++;
+   } else {
+  instr->texture = evaluate_deref(>instr, ir->sampler);
+   }
+
if (ir->coordinate != NULL) {
   instr->coord_components = ir->coordinate->type->vector_elements;
   instr->src[src_number].src =
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index f33049d7134..e395352f89c 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1218,6 +1218,8 @@ typedef enum {
nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */
nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
nir_tex_src_plane,  /* < selects plane for planar textures */
+   nir_tex_src_texture_handle, /* < handle for bindless texture */
+   nir_tex_src_sampler_handle, /* < handle for bindless sampler */
nir_num_tex_src_types
 } nir_tex_src_type;
 
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 21f13097651..52f20b1eb10 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -778,6 +778,12 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
   case nir_tex_src_plane:
  fprintf(fp, "(plane)");
  break;
+  case nir_tex_src_texture_handle:
+ fprintf(fp, "(texture_handle)");
+ break;
+  case nir_tex_src_sampler_handle:
+ fprintf(fp, "(sampler_handle)");
+ break;
 
   default:
  unreachable("Invalid texture source type");
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/3] glsl/nir: fix variable type for image intrinsics and ubos

2018-04-10 Thread Karol Herbst
If the bindless image is passed through a struct we ended up getting the
glsl_type of the struct, not the image.

variable_referenced points to the declaration of the struct, so it won't work
for bindless images. So just drop it.

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/glsl/glsl_to_nir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 9f233637306..bb9ba3af04a 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -893,7 +893,7 @@ nir_visitor::visit(ir_call *ir)
  exec_node *param = ir->actual_parameters.get_head();
  ir_dereference *image = (ir_dereference *)param;
  const glsl_type *type =
-image->variable_referenced()->type->without_array();
+image->type->without_array();
 
  instr->variables[0] = evaluate_deref(>instr, image);
  param = param->get_next();
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-10 Thread Karol Herbst
v2: fix assertion for bindless to non bindless assignments

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/nir/nir_split_var_copies.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_split_var_copies.c 
b/src/compiler/nir/nir_split_var_copies.c
index bc3ceedbdb8..e592754d770 100644
--- a/src/compiler/nir/nir_split_var_copies.c
+++ b/src/compiler/nir/nir_split_var_copies.c
@@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct 
split_var_copies_state *state)
   nir_deref_var *src_head = intrinsic->variables[1];
   nir_deref *dest_tail = nir_deref_tail(_head->deref);
   nir_deref *src_tail = nir_deref_tail(_head->deref);
+  enum glsl_base_type base_type = glsl_get_base_type(src_tail->type);
 
-  switch (glsl_get_base_type(src_tail->type)) {
+  switch (base_type) {
   case GLSL_TYPE_ARRAY:
   case GLSL_TYPE_STRUCT:
  split_var_copy_instr(intrinsic, dest_head, src_head,
@@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct 
split_var_copies_state *state)
 ralloc_steal(state->dead_ctx, instr);
  }
  break;
+  /* for bindless those are uint64 */
+  case GLSL_TYPE_IMAGE:
+  case GLSL_TYPE_SAMPLER:
+ assert(src_head->var->data.bindless ||
+glsl_get_base_type(src_head->var->type) == base_type);
   case GLSL_TYPE_INT:
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT16:
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/3] nir: add support for ARB_bindless_texture texture handles

2018-04-10 Thread Karol Herbst
With this it should be possible to add support for texture handles for backends
using NIR.

changes since v2:
* dropped patch for image handles, still need to work on that

Karol Herbst (3):
  nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars
  nir: add support for bindless_texture samplers
  glsl/nir: fix variable type for image intrinsics and ubos

 src/compiler/glsl/glsl_to_nir.cpp   | 19 ---
 src/compiler/nir/nir.h  |  2 ++
 src/compiler/nir/nir_print.c|  6 ++
 src/compiler/nir/nir_split_var_copies.c |  8 +++-
 4 files changed, 31 insertions(+), 4 deletions(-)

-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: properly handle bindless sampler and image parameters

2018-04-10 Thread Karol Herbst
fixes a piglit test I sent to the list:
spec@arb_bindless_texture@execution@samplers@basic-arithmetic-func-call-uvec2-texture2D

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/glsl/opt_function_inlining.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/opt_function_inlining.cpp 
b/src/compiler/glsl/opt_function_inlining.cpp
index 04690b6cf45..3d00074bbc3 100644
--- a/src/compiler/glsl/opt_function_inlining.cpp
+++ b/src/compiler/glsl/opt_function_inlining.cpp
@@ -155,7 +155,7 @@ ir_call::generate_inline(ir_instruction *next_ir)
   ir_rvalue *param = (ir_rvalue *) actual_node;
 
   /* Generate a new variable for the parameter. */
-  if (sig_param->type->contains_opaque()) {
+  if (!sig_param->contains_bindless() && 
sig_param->type->contains_opaque()) {
 /* For opaque types, we want the inlined variable references
  * referencing the passed in variable, since that will have
  * the location information, which an assignment of an opaque
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] nv50/ir: make a copy of tex src if it's referenced multiple times

2018-04-10 Thread Karol Herbst
I guess this fixes a bug somewhere?

On Tue, Apr 10, 2018 at 6:11 AM, Ilia Mirkin  wrote:
> For nv50 we coalesce the srcs and defs into a single node. As such, we
> can end up with impossible constraints if the source is referenced
> after the tex operation (which, due to the coalescing of values, will
> have overwritten it).
>
> This logic already exists for inserting moves for MERGE/UNION sources.
> It's the exact same idea here, so leverage that code, which also
> includes a few optimizations around not extending live ranges
> unnecessarily.
>
> Signed-off-by: Ilia Mirkin 
> ---
>
> v1 -> v2: make use of existing logic in insertConstraintMoves
>
>  src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 86 
> --
>  1 file changed, 49 insertions(+), 37 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> index 3a0e56e1385..7d107aca68d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> @@ -257,6 +257,7 @@ private:
> private:
>virtual bool visit(BasicBlock *);
>
> +  void insertConstraintMove(Instruction *, int s);
>bool insertConstraintMoves();
>
>void condenseDefs(Instruction *);
> @@ -2216,6 +2217,8 @@ 
> RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex)
> for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) {
>if (!tex->srcExists(c))
>   tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue()));
> +  else
> + insertConstraintMove(tex, c);
>if (!tex->defExists(c))
>   tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue()));
> }
> @@ -2288,6 +2291,51 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
> return true;
>  }
>
> +void
> +RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int 
> s)
> +{
> +   const uint8_t size = cst->src(s).getSize();
> +
> +   assert(cst->getSrc(s)->defs.size() == 1); // still SSA
> +
> +   Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
> +   bool imm = defi->op == OP_MOV &&
> +  defi->src(0).getFile() == FILE_IMMEDIATE;
> +   bool load = defi->op == OP_LOAD &&
> +  defi->src(0).getFile() == FILE_MEMORY_CONST &&
> +  !defi->src(0).isIndirect(0);
> +   // catch some cases where don't really need MOVs
> +   if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) {
> +  if (imm || load) {
> + // Move the defi right before the cst. No point in expanding
> + // the range.
> + defi->bb->remove(defi);
> + cst->bb->insertBefore(cst, defi);
> +  }
> +  return;
> +   }
> +
> +   LValue *lval = new_LValue(func, cst->src(s).getFile());
> +   lval->reg.size = size;
> +
> +   Instruction *mov = new_Instruction(func, OP_MOV, typeOfSize(size));
> +   mov->setDef(0, lval);
> +   mov->setSrc(0, cst->getSrc(s));
> +
> +   if (load) {
> +  mov->op = OP_LOAD;
> +  mov->setSrc(0, defi->getSrc(0));
> +   } else if (imm) {
> +  mov->setSrc(0, defi->getSrc(0));
> +   }
> +
> +   if (defi->getPredicate())
> +  mov->setPredicate(defi->cc, defi->getPredicate());
> +
> +   cst->setSrc(s, mov->getDef(0));
> +   cst->bb->insertBefore(cst, mov);
> +}
> +
>  // Insert extra moves so that, if multiple register constraints on a value 
> are
>  // in conflict, these conflicts can be resolved.
>  bool
> @@ -2328,46 +2376,10 @@ 
> RegAlloc::InsertConstraintsPass::insertConstraintMoves()
> cst->bb->insertBefore(cst, mov);
> continue;
>  }
> -assert(cst->getSrc(s)->defs.size() == 1); // still SSA
> -
> -Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
> -bool imm = defi->op == OP_MOV &&
> -   defi->src(0).getFile() == FILE_IMMEDIATE;
> -bool load = defi->op == OP_LOAD &&
> -   defi->src(0).getFile() == FILE_MEMORY_CONST &&
> -   !defi->src(0).isIndirect(0);
> -// catch some cases where don't really need MOVs
> -if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) 
> {
> -   if (imm || load) {
> -  // Move the defi right before the cst. No point in 
> expanding
> -  // the range.
> -  defi->bb->remove(defi);
> -  cst->bb->insertBefore(cst, defi);
> -   }
> -   continue;
> -}
>
> -LValue *lval = new_LValue(func, cst->src(s).getFile());
> -lval->reg.size = size;
> -
> -mov = new_Instruction(func, OP_MOV, typeOfSize(size));
> -mov->setDef(0, lval);
> -mov->setSrc(0, cst->getSrc(s));
> -
> -if (load) {
> -   mov->op = OP_LOAD;
> -   mov->setSrc(0, defi->getSrc(0));
> -} 

Re: [Mesa-dev] [PATCH] RFC gallium: add 64 bit integer formats

2018-04-09 Thread Karol Herbst
On Tue, Apr 10, 2018 at 2:43 AM, Ilia Mirkin <imir...@alum.mit.edu> wrote:
> On Mon, Apr 9, 2018 at 8:39 PM, Karol Herbst <kher...@redhat.com> wrote:
>> unsigneds are needed by ARB_bindless_texture 64 bit vertex attribs, both for
>> NV_vertex_attrib_integer64.
>>
>> Fixes the new piglit sampler-vertex-attrib-input-output test I sent some days
>> ago for bindless_texture.
>>
>> The change inside vbo_attrtype_to_double_flag is what I am most concerned
>> about. Maybe I should add another flag for 64 bit ints. Or rework what 
>> Doubles
>> mean in gl_array_attributes. Or Rename that to is64Bit and rework all users 
>> of
>> Doubles.
>>
>> Any suggestions?
>>
>> Signed-off-by: Karol Herbst <kher...@redhat.com>
>> ---
>>  src/gallium/drivers/svga/svga_format.c |  8 
>>  src/gallium/include/pipe/p_format.h|  9 +
>>  src/mesa/main/glformats.c  |  3 +++
>>  src/mesa/state_tracker/st_atom_array.c | 30 +++---
>>  src/mesa/vbo/vbo_private.h |  2 +-
>>  5 files changed, 48 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/gallium/drivers/svga/svga_format.c 
>> b/src/gallium/drivers/svga/svga_format.c
>> index 20a6e6b159f..f01a0e79c72 100644
>> --- a/src/gallium/drivers/svga/svga_format.c
>> +++ b/src/gallium/drivers/svga/svga_format.c
>> @@ -369,6 +369,14 @@ static const struct vgpu10_format_entry 
>> format_conversion_table[] =
>> { PIPE_FORMAT_A1B5G5R5_UNORM,SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> { PIPE_FORMAT_X1B5G5R5_UNORM,SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> { PIPE_FORMAT_A4B4G4R4_UNORM,SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64_UINT,  SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64G64_UINT,   SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64G64B64_UINT,SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64G64B64A64_UINT, SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64_SINT,  SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64G64_SINT,   SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64G64B64_SINT,SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64G64B64A64_SINT, SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>>  };
>>
>>
>> diff --git a/src/gallium/include/pipe/p_format.h 
>> b/src/gallium/include/pipe/p_format.h
>> index 57399800fa4..df698856b70 100644
>> --- a/src/gallium/include/pipe/p_format.h
>> +++ b/src/gallium/include/pipe/p_format.h
>> @@ -396,6 +396,15 @@ enum pipe_format {
>> PIPE_FORMAT_X1B5G5R5_UNORM  = 310,
>> PIPE_FORMAT_A4B4G4R4_UNORM  = 311,
>>
>> +   PIPE_FORMAT_R64_UINT= 312,
>> +   PIPE_FORMAT_R64G64_UINT = 313,
>> +   PIPE_FORMAT_R64G64B64_UINT  = 314,
>> +   PIPE_FORMAT_R64G64B64A64_UINT   = 315,
>> +   PIPE_FORMAT_R64_SINT= 316,
>> +   PIPE_FORMAT_R64G64_SINT = 317,
>> +   PIPE_FORMAT_R64G64B64_SINT  = 318,
>> +   PIPE_FORMAT_R64G64B64A64_SINT   = 319,
>> +
>> PIPE_FORMAT_COUNT
>>  };
>>
>> diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
>> index 1e797c24c2a..feafd97f5ee 100644
>> --- a/src/mesa/main/glformats.c
>> +++ b/src/mesa/main/glformats.c
>> @@ -543,6 +543,9 @@ _mesa_bytes_per_vertex_attrib(GLint comps, GLenum type)
>> case GL_INT:
>> case GL_UNSIGNED_INT:
>>return comps * sizeof(GLint);
>> +   /* ARB_bindless_texture */
>> +   case GL_UNSIGNED_INT64_ARB:
>> +  return comps * sizeof(GLuint64EXT);
>> case GL_FLOAT:
>>return comps * sizeof(GLfloat);
>> case GL_HALF_FLOAT_ARB:
>> diff --git a/src/mesa/state_tracker/st_atom_array.c 
>> b/src/mesa/state_tracker/st_atom_array.c
>> index 2fd67e8d840..1c3f677d4bf 100644
>> --- a/src/mesa/state_tracker/st_atom_array.c
>> +++ b/src/mesa/state_tracker/st_atom_array.c
>> @@ -230,6 +230,27 @@ static const uint16_t vertex_formats[][4][4] = {
>>   PIPE_FORMAT_R32G32B32A32_FIXED
>>},

[Mesa-dev] [PATCH] RFC gallium: add 64 bit integer formats

2018-04-09 Thread Karol Herbst
unsigneds are needed by ARB_bindless_texture 64 bit vertex attribs, both for
NV_vertex_attrib_integer64.

Fixes the new piglit sampler-vertex-attrib-input-output test I sent some days
ago for bindless_texture.

The change inside vbo_attrtype_to_double_flag is what I am most concerned
about. Maybe I should add another flag for 64 bit ints. Or rework what Doubles
mean in gl_array_attributes. Or Rename that to is64Bit and rework all users of
Doubles.

Any suggestions?

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/gallium/drivers/svga/svga_format.c |  8 
 src/gallium/include/pipe/p_format.h|  9 +
 src/mesa/main/glformats.c  |  3 +++
 src/mesa/state_tracker/st_atom_array.c | 30 +++---
 src/mesa/vbo/vbo_private.h |  2 +-
 5 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_format.c 
b/src/gallium/drivers/svga/svga_format.c
index 20a6e6b159f..f01a0e79c72 100644
--- a/src/gallium/drivers/svga/svga_format.c
+++ b/src/gallium/drivers/svga/svga_format.c
@@ -369,6 +369,14 @@ static const struct vgpu10_format_entry 
format_conversion_table[] =
{ PIPE_FORMAT_A1B5G5R5_UNORM,SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
{ PIPE_FORMAT_X1B5G5R5_UNORM,SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
{ PIPE_FORMAT_A4B4G4R4_UNORM,SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64_UINT,  SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64G64_UINT,   SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64G64B64_UINT,SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64G64B64A64_UINT, SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64_SINT,  SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64G64_SINT,   SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64G64B64_SINT,SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64G64B64A64_SINT, SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
 };
 
 
diff --git a/src/gallium/include/pipe/p_format.h 
b/src/gallium/include/pipe/p_format.h
index 57399800fa4..df698856b70 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -396,6 +396,15 @@ enum pipe_format {
PIPE_FORMAT_X1B5G5R5_UNORM  = 310,
PIPE_FORMAT_A4B4G4R4_UNORM  = 311,
 
+   PIPE_FORMAT_R64_UINT= 312,
+   PIPE_FORMAT_R64G64_UINT = 313,
+   PIPE_FORMAT_R64G64B64_UINT  = 314,
+   PIPE_FORMAT_R64G64B64A64_UINT   = 315,
+   PIPE_FORMAT_R64_SINT= 316,
+   PIPE_FORMAT_R64G64_SINT = 317,
+   PIPE_FORMAT_R64G64B64_SINT  = 318,
+   PIPE_FORMAT_R64G64B64A64_SINT   = 319,
+
PIPE_FORMAT_COUNT
 };
 
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index 1e797c24c2a..feafd97f5ee 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -543,6 +543,9 @@ _mesa_bytes_per_vertex_attrib(GLint comps, GLenum type)
case GL_INT:
case GL_UNSIGNED_INT:
   return comps * sizeof(GLint);
+   /* ARB_bindless_texture */
+   case GL_UNSIGNED_INT64_ARB:
+  return comps * sizeof(GLuint64EXT);
case GL_FLOAT:
   return comps * sizeof(GLfloat);
case GL_HALF_FLOAT_ARB:
diff --git a/src/mesa/state_tracker/st_atom_array.c 
b/src/mesa/state_tracker/st_atom_array.c
index 2fd67e8d840..1c3f677d4bf 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -230,6 +230,27 @@ static const uint16_t vertex_formats[][4][4] = {
  PIPE_FORMAT_R32G32B32A32_FIXED
   },
},
+   {{0}}, /* gap */
+   { /* GL_INT64_ARB */
+  {0},
+  {0},
+  {
+ PIPE_FORMAT_R64_SINT,
+ PIPE_FORMAT_R64G64_SINT,
+ PIPE_FORMAT_R64G64B64_SINT,
+ PIPE_FORMAT_R64G64B64A64_SINT
+  },
+   },
+   { /* GL_UNSIGNED_INT64_ARB */
+  {0},
+  {0},
+  {
+ PIPE_FORMAT_R64_UINT,
+ PIPE_FORMAT_R64G64_UINT,
+ PIPE_FORMAT_R64G64B64_UINT,
+ PIPE_FORMAT_R64G64B64A64_UINT
+  },
+   },
 };
 
 
@@ -244,7 +265,7 @@ st_pipe_vertex_format(const struct gl_array_attributes 
*attrib)
const bool normalized = attrib->Normalized;
const bool integer = attrib->Integer;
GLenum16 type = attrib->Type;
-   unsigned index;
+   unsigned index = integer*2 + normalized;
 
assert(size >= 1 && size <= 4);
assert(format == GL_RGBA || format == GL_BGRA);
@@ -298,11 +319,14 @@ st_pipe_vertex_format(const struct gl_array_attributes 
*attrib)
  return PIPE_FORMAT_B8G8R8A8_UNORM;
   }
   break;
+   case GL_UNSIGNED_INT

Re: [Mesa-dev] [PATCH 1/4] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-04 Thread Karol Herbst
On Wed, Apr 4, 2018 at 2:23 AM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> I have a very strong feeling that this isn't the only place where
> reading/writing IMAGE and SAMPLER variables is going to cause NIR heartburn.
> For example, we have special cases in nir_validate for SUBROUTINE variables
> and we probably need IMAGE and SAMPLER support everywhere we have SUBROUTINE
> plus some (since you can write to them now as well).
>

yeah. I was just making piglit happy here. I guess I will try to run
it with some games using bindless_textures and fix all the crashes I
encounter there at least. More piglit tests might be useful as well.
Sadly I don't see any bindless_textures tests in the CTS :(

>
> On Tue, Apr 3, 2018 at 6:21 AM, Karol Herbst <kher...@redhat.com> wrote:
>>
>> Signed-off-by: Karol Herbst <kher...@redhat.com>
>> ---
>>  src/compiler/nir/nir_split_var_copies.c | 4 
>>  1 file changed, 4 insertions(+)
>>
>> diff --git a/src/compiler/nir/nir_split_var_copies.c
>> b/src/compiler/nir/nir_split_var_copies.c
>> index bc3ceedbdb8..231a89add4d 100644
>> --- a/src/compiler/nir/nir_split_var_copies.c
>> +++ b/src/compiler/nir/nir_split_var_copies.c
>> @@ -241,6 +241,10 @@ split_var_copies_block(nir_block *block, struct
>> split_var_copies_state *state)
>>  ralloc_steal(state->dead_ctx, instr);
>>   }
>>   break;
>> +  /* for bindless those are uint64 */
>> +  case GLSL_TYPE_IMAGE:
>> +  case GLSL_TYPE_SAMPLER:
>> + assert(src_head->var->data.bindless);
>>case GLSL_TYPE_INT:
>>case GLSL_TYPE_UINT:
>>case GLSL_TYPE_INT16:
>> --
>> 2.14.3
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] nir: add support for bindless_texture samplers

2018-04-04 Thread Karol Herbst
On Wed, Apr 4, 2018 at 2:16 AM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> On Tue, Apr 3, 2018 at 6:21 AM, Karol Herbst <kher...@redhat.com> wrote:
>>
>> Signed-off-by: Karol Herbst <kher...@redhat.com>
>> ---
>>  src/compiler/glsl/glsl_to_nir.cpp | 17 +++--
>>  src/compiler/nir/nir.h|  1 +
>>  src/compiler/nir/nir_print.c  |  3 +++
>>  3 files changed, 19 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/compiler/glsl/glsl_to_nir.cpp
>> b/src/compiler/glsl/glsl_to_nir.cpp
>> index dbb58d82e8f..8e2d96a2361 100644
>> --- a/src/compiler/glsl/glsl_to_nir.cpp
>> +++ b/src/compiler/glsl/glsl_to_nir.cpp
>> @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir)
>>  {
>> unsigned num_srcs;
>> nir_texop op;
>> +   bool bindless =
>> ir->sampler->variable_referenced()->contains_bindless();
>> +
>> switch (ir->op) {
>> case ir_tex:
>>op = nir_texop_tex;
>> @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir)
>>num_srcs++;
>> if (ir->offset != NULL)
>>num_srcs++;
>> +   if (bindless)
>> +  num_srcs=+;
>>
>> nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
>>
>> @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir)
>>unreachable("not reached");
>> }
>>
>> -   instr->texture = evaluate_deref(>instr, ir->sampler);
>> -
>> unsigned src_number = 0;
>>
>> +   /* for bindless we use the handle src */
>> +   if (bindless) {
>> +  instr->texture = NULL;
>> +  instr->src[src_number].src =
>> + nir_src_for_ssa(evaluate_rvalue(ir->sampler));
>> +  instr->src[src_number].src_type = nir_tex_src_handle;
>> +  src_number++;
>> +   } else {
>> +  instr->texture = evaluate_deref(>instr, ir->sampler);
>> +   }
>> +
>> if (ir->coordinate != NULL) {
>>instr->coord_components = ir->coordinate->type->vector_elements;
>>instr->src[src_number].src =
>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> index f33049d7134..e4d626d263e 100644
>> --- a/src/compiler/nir/nir.h
>> +++ b/src/compiler/nir/nir.h
>> @@ -1218,6 +1218,7 @@ typedef enum {
>> nir_tex_src_texture_offset, /* < dynamically uniform indirect offset
>> */
>> nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset
>> */
>> nir_tex_src_plane,  /* < selects plane for planar textures */
>> +   nir_tex_src_handle, /* < handle for bindless samples */
>
>
> Do we want to have separate texture and sampler handles?  We don't care for
> GL but I kind-of think we will for Vulkan.
>

Don't know. Never looked into vulkan yet. I could rename it to
sample_handle for now and we can add the texture handle later for
vulkan?

>>
>> nir_num_tex_src_types
>>  } nir_tex_src_type;
>>
>> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
>> index 21f13097651..c9431555f2f 100644
>> --- a/src/compiler/nir/nir_print.c
>> +++ b/src/compiler/nir/nir_print.c
>> @@ -778,6 +778,9 @@ print_tex_instr(nir_tex_instr *instr, print_state
>> *state)
>>case nir_tex_src_plane:
>>   fprintf(fp, "(plane)");
>>   break;
>> +  case nir_tex_src_handle:
>> + fprintf(fp, "(handle)");
>> + break;
>>
>>default:
>>   unreachable("Invalid texture source type");
>> --
>> 2.14.3
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] nir: add support for bindless_texture samplers

2018-04-03 Thread Karol Herbst
On Tue, Apr 3, 2018 at 3:21 PM, Karol Herbst <kher...@redhat.com> wrote:
> Signed-off-by: Karol Herbst <kher...@redhat.com>
> ---
>  src/compiler/glsl/glsl_to_nir.cpp | 17 +++--
>  src/compiler/nir/nir.h|  1 +
>  src/compiler/nir/nir_print.c  |  3 +++
>  3 files changed, 19 insertions(+), 2 deletions(-)
>
> diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
> b/src/compiler/glsl/glsl_to_nir.cpp
> index dbb58d82e8f..8e2d96a2361 100644
> --- a/src/compiler/glsl/glsl_to_nir.cpp
> +++ b/src/compiler/glsl/glsl_to_nir.cpp
> @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir)
>  {
> unsigned num_srcs;
> nir_texop op;
> +   bool bindless = ir->sampler->variable_referenced()->contains_bindless();
> +
> switch (ir->op) {
> case ir_tex:
>op = nir_texop_tex;
> @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir)
>num_srcs++;
> if (ir->offset != NULL)
>num_srcs++;
> +   if (bindless)
> +  num_srcs=+;

small type here, should have been "num_srcs++" instead.

>
> nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
>
> @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir)
>unreachable("not reached");
> }
>
> -   instr->texture = evaluate_deref(>instr, ir->sampler);
> -
> unsigned src_number = 0;
>
> +   /* for bindless we use the handle src */
> +   if (bindless) {
> +  instr->texture = NULL;
> +  instr->src[src_number].src =
> + nir_src_for_ssa(evaluate_rvalue(ir->sampler));
> +  instr->src[src_number].src_type = nir_tex_src_handle;
> +  src_number++;
> +   } else {
> +  instr->texture = evaluate_deref(>instr, ir->sampler);
> +   }
> +
> if (ir->coordinate != NULL) {
>instr->coord_components = ir->coordinate->type->vector_elements;
>instr->src[src_number].src =
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index f33049d7134..e4d626d263e 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -1218,6 +1218,7 @@ typedef enum {
> nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */
> nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
> nir_tex_src_plane,  /* < selects plane for planar textures */
> +   nir_tex_src_handle, /* < handle for bindless samples */
> nir_num_tex_src_types
>  } nir_tex_src_type;
>
> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
> index 21f13097651..c9431555f2f 100644
> --- a/src/compiler/nir/nir_print.c
> +++ b/src/compiler/nir/nir_print.c
> @@ -778,6 +778,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
>case nir_tex_src_plane:
>   fprintf(fp, "(plane)");
>   break;
> +  case nir_tex_src_handle:
> + fprintf(fp, "(handle)");
> + break;
>
>default:
>   unreachable("Invalid texture source type");
> --
> 2.14.3
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-03 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/nir/nir_split_var_copies.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/compiler/nir/nir_split_var_copies.c 
b/src/compiler/nir/nir_split_var_copies.c
index bc3ceedbdb8..231a89add4d 100644
--- a/src/compiler/nir/nir_split_var_copies.c
+++ b/src/compiler/nir/nir_split_var_copies.c
@@ -241,6 +241,10 @@ split_var_copies_block(nir_block *block, struct 
split_var_copies_state *state)
 ralloc_steal(state->dead_ctx, instr);
  }
  break;
+  /* for bindless those are uint64 */
+  case GLSL_TYPE_IMAGE:
+  case GLSL_TYPE_SAMPLER:
+ assert(src_head->var->data.bindless);
   case GLSL_TYPE_INT:
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT16:
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] glsl/nir: fix variable type for image intrinsics and ubos

2018-04-03 Thread Karol Herbst
If the bindless image is passed through a struct we ended up getting the
glsl_type of the struct, not the image.

variable_referenced points to the declaration of the struct, so it won't work
for bindless images. So just drop it.

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/glsl/glsl_to_nir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 8e2d96a2361..1fc0cac4736 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -893,7 +893,7 @@ nir_visitor::visit(ir_call *ir)
  exec_node *param = ir->actual_parameters.get_head();
  ir_dereference *image = (ir_dereference *)param;
  const glsl_type *type =
-image->variable_referenced()->type->without_array();
+image->type->without_array();
 
  instr->variables[0] = evaluate_deref(>instr, image);
  param = param->get_next();
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/4] nir: add support for bindless_texture

2018-04-03 Thread Karol Herbst
I think most of the changes are straigh forward. The changes needed for images
should be discussed, because in its current form it would require changing all
drivers using nir and supporting images.

Karol Herbst (4):
  nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars
  nir: add support for bindless_texture samplers
  glsl/nir: fix variable type for image intrinsics and ubos
  RFC nir: add support for bindless_texture images

 src/compiler/glsl/glsl_to_nir.cpp   | 38 -
 src/compiler/nir/nir.h  |  3 ++-
 src/compiler/nir/nir_intrinsics.py  | 24 ++---
 src/compiler/nir/nir_print.c|  3 +++
 src/compiler/nir/nir_split_var_copies.c |  4 
 5 files changed, 54 insertions(+), 18 deletions(-)

-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] RFC nir: add support for bindless_texture images

2018-04-03 Thread Karol Herbst
I added another source for all image_var_* intrinsics. Drivers have to be
adjusted with this change.

There was some discussion to add new intrinsics to handle operations on
bindless images. Maybe we can continue with this here?

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/glsl/glsl_to_nir.cpp  | 19 +--
 src/compiler/nir/nir.h |  2 +-
 src/compiler/nir/nir_intrinsics.py | 24 
 3 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 1fc0cac4736..4e053c140c2 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -894,10 +894,14 @@ nir_visitor::visit(ir_call *ir)
  ir_dereference *image = (ir_dereference *)param;
  const glsl_type *type =
 image->type->without_array();
+ bool bindless = image->variable_referenced()->contains_bindless();
 
  instr->variables[0] = evaluate_deref(>instr, image);
  param = param->get_next();
 
+ if (bindless)
+instr->variables[0]->var->data.bindless = true;
+
  /* Set the intrinsic destination. */
  if (ir->return_deref) {
 unsigned num_components = ir->return_deref->type->vector_elements;
@@ -909,6 +913,11 @@ nir_visitor::visit(ir_call *ir)
 
  if (op == nir_intrinsic_image_var_size ||
  op == nir_intrinsic_image_var_samples) {
+if (bindless) {
+   instr->src[0] = nir_src_for_ssa(evaluate_rvalue(image));
+} else {
+   instr->src[0] = nir_src_for_ssa(_undef->def);
+}
 nir_builder_instr_insert(, >instr);
 break;
  }
@@ -941,15 +950,21 @@ nir_visitor::visit(ir_call *ir)
 instr->src[1] = nir_src_for_ssa(_undef->def);
  }
 
+ if (bindless) {
+instr->src[2] = nir_src_for_ssa(evaluate_rvalue(image));
+ } else {
+instr->src[2] = nir_src_for_ssa(_undef->def);
+ }
+
  /* Set the intrinsic parameters. */
  if (!param->is_tail_sentinel()) {
-instr->src[2] =
+instr->src[3] =
nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
 param = param->get_next();
  }
 
  if (!param->is_tail_sentinel()) {
-instr->src[3] =
+instr->src[4] =
nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
 param = param->get_next();
  }
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index e4d626d263e..c6081cbb61f 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1108,7 +1108,7 @@ typedef enum {
 
 } nir_intrinsic_index_flag;
 
-#define NIR_INTRINSIC_MAX_INPUTS 4
+#define NIR_INTRINSIC_MAX_INPUTS 5
 
 typedef struct {
const char *name;
diff --git a/src/compiler/nir/nir_intrinsics.py 
b/src/compiler/nir/nir_intrinsics.py
index 1bc99552cd7..d6da63ab769 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -291,19 +291,19 @@ atomic3("atomic_counter_comp_swap")
 # argument with the value to be written, and image atomic operations take
 # either one or two additional scalar arguments with the same meaning as in
 # the ARB_shader_image_load_store specification.
-intrinsic("image_var_load", src_comp=[4, 1], dest_comp=4, num_vars=1,
+intrinsic("image_var_load", src_comp=[4, 1, 1], dest_comp=4, num_vars=1,
   flags=[CAN_ELIMINATE])
-intrinsic("image_var_store", src_comp=[4, 1, 4], num_vars=1)
-intrinsic("image_var_atomic_add",  src_comp=[4, 1, 1], dest_comp=1, num_vars=1)
-intrinsic("image_var_atomic_min",  src_comp=[4, 1, 1], dest_comp=1, num_vars=1)
-intrinsic("image_var_atomic_max",  src_comp=[4, 1, 1], dest_comp=1, num_vars=1)
-intrinsic("image_var_atomic_and",  src_comp=[4, 1, 1], dest_comp=1, num_vars=1)
-intrinsic("image_var_atomic_or",   src_comp=[4, 1, 1], dest_comp=1, num_vars=1)
-intrinsic("image_var_atomic_xor",  src_comp=[4, 1, 1], dest_comp=1, num_vars=1)
-intrinsic("image_var_atomic_exchange",  src_comp=[4, 1, 1], dest_comp=1, 
num_vars=1)
-intrinsic("image_var_atomic_comp_swap", src_comp=[4, 1, 1, 1], dest_comp=1, 
num_vars=1)
-intrinsic("image_var_size",dest_comp=0, num_vars=1, flags=[CAN_ELIMINATE, 
CAN_REORDER])
-intrinsic("image_var_samples", dest_comp=1, num_vars=1, flags=[CAN_ELIMINATE, 
CAN_REORDER])
+intrinsic("image_var_store", src_comp=[4, 1, 1, 4], num_vars=1)
+intrinsic("image_var_atomic_add",  src_comp=[4, 1, 1, 1], dest_comp=1, 
num_vars=1)
+intrinsic("image_var_atomic_min",  src_comp=[4, 1, 1, 1], dest_comp=1, 
num_vars

[Mesa-dev] [PATCH 2/4] nir: add support for bindless_texture samplers

2018-04-03 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/glsl/glsl_to_nir.cpp | 17 +++--
 src/compiler/nir/nir.h|  1 +
 src/compiler/nir/nir_print.c  |  3 +++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index dbb58d82e8f..8e2d96a2361 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir)
 {
unsigned num_srcs;
nir_texop op;
+   bool bindless = ir->sampler->variable_referenced()->contains_bindless();
+
switch (ir->op) {
case ir_tex:
   op = nir_texop_tex;
@@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir)
   num_srcs++;
if (ir->offset != NULL)
   num_srcs++;
+   if (bindless)
+  num_srcs=+;
 
nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
 
@@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir)
   unreachable("not reached");
}
 
-   instr->texture = evaluate_deref(>instr, ir->sampler);
-
unsigned src_number = 0;
 
+   /* for bindless we use the handle src */
+   if (bindless) {
+  instr->texture = NULL;
+  instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->sampler));
+  instr->src[src_number].src_type = nir_tex_src_handle;
+  src_number++;
+   } else {
+  instr->texture = evaluate_deref(>instr, ir->sampler);
+   }
+
if (ir->coordinate != NULL) {
   instr->coord_components = ir->coordinate->type->vector_elements;
   instr->src[src_number].src =
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index f33049d7134..e4d626d263e 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1218,6 +1218,7 @@ typedef enum {
nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */
nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
nir_tex_src_plane,  /* < selects plane for planar textures */
+   nir_tex_src_handle, /* < handle for bindless samples */
nir_num_tex_src_types
 } nir_tex_src_type;
 
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 21f13097651..c9431555f2f 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -778,6 +778,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
   case nir_tex_src_plane:
  fprintf(fp, "(plane)");
  break;
+  case nir_tex_src_handle:
+ fprintf(fp, "(handle)");
+ break;
 
   default:
  unreachable("Invalid texture source type");
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: add support for 4 constant offsets in tg4

2018-03-30 Thread Karol Herbst
On Fri, Mar 30, 2018 at 9:35 PM, Eric Anholt <e...@anholt.net> wrote:
> Karol Herbst <kher...@redhat.com> writes:
>
>> Nvidia hardware can do that natively so there is no need to lower that to 
>> four
>> TG4s instructions.
>>
>> Signed-off-by: Karol Herbst <kher...@redhat.com>
>> ---
>>  src/compiler/glsl/glsl_to_nir.cpp | 25 ++---
>>  src/compiler/nir/nir.h|  9 -
>>  src/compiler/nir/nir_print.c  |  9 +
>>  3 files changed, 35 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
>> b/src/compiler/glsl/glsl_to_nir.cpp
>> index c4a6d52a5b2..4ea5f1616a7 100644
>> --- a/src/compiler/glsl/glsl_to_nir.cpp
>> +++ b/src/compiler/glsl/glsl_to_nir.cpp
>> @@ -2042,7 +2042,9 @@ nir_visitor::visit(ir_texture *ir)
>>num_srcs++;
>> if (ir->shadow_comparator != NULL)
>>num_srcs++;
>> -   if (ir->offset != NULL)
>> +   if (ir->offset != NULL && ir->offset->type->is_array())
>> +  num_srcs += ir->offset->type->array_size();
>> +   else if (ir->offset != NULL)
>>num_srcs++;
>>
>> nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
>> @@ -2097,12 +2099,21 @@ nir_visitor::visit(ir_texture *ir)
>>
>> if (ir->offset != NULL) {
>>/* we don't support multiple offsets yet */
>> -  assert(ir->offset->type->is_vector() || 
>> ir->offset->type->is_scalar());
>> -
>> -  instr->src[src_number].src =
>> - nir_src_for_ssa(evaluate_rvalue(ir->offset));
>> -  instr->src[src_number].src_type = nir_tex_src_offset;
>> -  src_number++;
>> +  if (ir->offset->type->is_vector() || ir->offset->type->is_scalar()) {
>> + instr->src[src_number].src =
>> +nir_src_for_ssa(evaluate_rvalue(ir->offset));
>> + instr->src[src_number].src_type = nir_tex_src_offset;
>> + src_number++;
>> +  } else if (ir->offset->type->is_array()) {
>> + for (int i = 0; i < ir->offset->type->array_size(); i++) {
>> +instr->src[src_number].src =
>> +   
>> nir_src_for_ssa(evaluate_rvalue(ir->offset->as_constant()->get_array_element(i)->as_rvalue()));
>> +instr->src[src_number].src_type = 
>> (nir_tex_src_type)(nir_tex_src_offset + i);
>> +src_number++;
>> + }
>> +  } else {
>> + assert(false);
>
> Maybe just do assert(ir->offset->type->is_array()) in the previous block
> instead of the extra else.  And optionally pull
> ir->offset->as_constant() out to a temporary for nicer column wrapping.
> Other than that, this seems good.
>

well the thing is, it only works with constants within the array. If
you have non constant values the code wouldn't assert on that. But I
will try to think about something nice there.

> Reviewed-by: Eric Anholt <e...@anholt.net>
>
> If I'm reading my specs right, I'll be able to use this on vc6, too.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nir: add support for 4 constant offsets in tg4

2018-03-29 Thread Karol Herbst
Nvidia hardware can do that natively so there is no need to lower that to four
TG4s instructions.

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/glsl/glsl_to_nir.cpp | 25 ++---
 src/compiler/nir/nir.h|  9 -
 src/compiler/nir/nir_print.c  |  9 +
 3 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index c4a6d52a5b2..4ea5f1616a7 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -2042,7 +2042,9 @@ nir_visitor::visit(ir_texture *ir)
   num_srcs++;
if (ir->shadow_comparator != NULL)
   num_srcs++;
-   if (ir->offset != NULL)
+   if (ir->offset != NULL && ir->offset->type->is_array())
+  num_srcs += ir->offset->type->array_size();
+   else if (ir->offset != NULL)
   num_srcs++;
 
nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
@@ -2097,12 +2099,21 @@ nir_visitor::visit(ir_texture *ir)
 
if (ir->offset != NULL) {
   /* we don't support multiple offsets yet */
-  assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar());
-
-  instr->src[src_number].src =
- nir_src_for_ssa(evaluate_rvalue(ir->offset));
-  instr->src[src_number].src_type = nir_tex_src_offset;
-  src_number++;
+  if (ir->offset->type->is_vector() || ir->offset->type->is_scalar()) {
+ instr->src[src_number].src =
+nir_src_for_ssa(evaluate_rvalue(ir->offset));
+ instr->src[src_number].src_type = nir_tex_src_offset;
+ src_number++;
+  } else if (ir->offset->type->is_array()) {
+ for (int i = 0; i < ir->offset->type->array_size(); i++) {
+instr->src[src_number].src =
+   
nir_src_for_ssa(evaluate_rvalue(ir->offset->as_constant()->get_array_element(i)->as_rvalue()));
+instr->src[src_number].src_type = 
(nir_tex_src_type)(nir_tex_src_offset + i);
+src_number++;
+ }
+  } else {
+ assert(false);
+  }
}
 
switch (ir->op) {
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 9fff1f4647d..7b02c4af05f 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1175,6 +1175,9 @@ typedef enum {
nir_tex_src_projector,
nir_tex_src_comparator, /* shadow comparator */
nir_tex_src_offset,
+   nir_tex_src_offset1,
+   nir_tex_src_offset2,
+   nir_tex_src_offset3,
nir_tex_src_bias,
nir_tex_src_lod,
nir_tex_src_ms_index, /* MSAA sample index */
@@ -1377,6 +1380,9 @@ nir_tex_instr_src_type(const nir_tex_instr *instr, 
unsigned src)
   return nir_type_float;
 
case nir_tex_src_offset:
+   case nir_tex_src_offset1:
+   case nir_tex_src_offset2:
+   case nir_tex_src_offset3:
case nir_tex_src_ms_index:
case nir_tex_src_texture_offset:
case nir_tex_src_sampler_offset:
@@ -1408,7 +1414,8 @@ nir_tex_instr_src_size(const nir_tex_instr *instr, 
unsigned src)
/* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for
 * the offset, since a cube maps to a single face.
 */
-   if (instr->src[src].src_type == nir_tex_src_offset) {
+   if (instr->src[src].src_type >= nir_tex_src_offset &&
+   instr->src[src].src_type <= nir_tex_src_offset3) {
   if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
  return 2;
   else if (instr->is_array)
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 21f13097651..e13a4f9aa6d 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -751,6 +751,15 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
   case nir_tex_src_offset:
  fprintf(fp, "(offset)");
  break;
+  case nir_tex_src_offset1:
+ fprintf(fp, "(offset1)");
+ break;
+  case nir_tex_src_offset2:
+ fprintf(fp, "(offset2)");
+ break;
+  case nir_tex_src_offset3:
+ fprintf(fp, "(offset3)");
+ break;
   case nir_tex_src_bias:
  fprintf(fp, "(bias)");
  break;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0: drop image binding from BGR10A2 format

2018-03-29 Thread Karol Herbst
Did a CTS run on that. Things are looking better with it. No regressions.

Tested-By: Karol Herbst <kher...@redhat.com>

On Thu, Mar 29, 2018 at 5:47 AM, Ilia Mirkin <imir...@alum.mit.edu> wrote:
> Fixes a bunch of new CTS pbo tests that use that as an output format,
> which the state tracker converts into buffer image writes.
>
> No part of the driver is ready for BGR10A2. It could probably be enabled
> on Maxwell+, but seems unnecessary.
>
> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
> ---
>  src/gallium/drivers/nouveau/nv50/nv50_formats.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c 
> b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
> index 0ead8ac2e1e..9f8faf768dd 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
> @@ -154,7 +154,7 @@ const struct nv50_format 
> nv50_format_table[PIPE_FORMAT_COUNT] =
>
> C4(A, R10G10B10A2_UNORM, RGB10_A2_UNORM, R, G, B, A, UNORM, A2B10G10R10, 
> TD),
> F3(A, R10G10B10X2_UNORM, RGB10_A2_UNORM, R, G, B, xx, UNORM, A2B10G10R10, 
> T),
> -   C4(A, B10G10R10A2_UNORM, BGR10_A2_UNORM, B, G, R, A, UNORM, A2B10G10R10, 
> IB),
> +   C4(A, B10G10R10A2_UNORM, BGR10_A2_UNORM, B, G, R, A, UNORM, A2B10G10R10, 
> TB),
> F3(A, B10G10R10X2_UNORM, BGR10_A2_UNORM, B, G, R, xx, UNORM, A2B10G10R10, 
> T),
> C4(A, R10G10B10A2_SNORM, NONE, R, G, B, A, SNORM, A2B10G10R10, T),
> C4(A, B10G10R10A2_SNORM, NONE, B, G, R, A, SNORM, A2B10G10R10, T),
> --
> 2.16.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0/ir: fix INTERP_* with indirect inputs

2018-03-27 Thread Karol Herbst
Reviewed-by: Karol Herbst <kher...@redhat.com>

On Sat, Mar 24, 2018 at 8:19 PM, Ilia Mirkin <imir...@alum.mit.edu> wrote:
> There were two problems, both of which are fixed now:
>  - The indirect address was not being shifted by 4
>  - The indirect address was being placed as an argument in the offset case
>
> This fixes some of the new interpolateAt* piglits which now test for
> these situations.
>
> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 7 ---
>  1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> index 09b5228127a..3c5bad05fe7 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> @@ -3083,10 +3083,11 @@ Converter::handleINTERP(Value *dst[4])
>   assert(sym[c]);
>   op = insn->op;
>   mode = insn->ipa;
> + ptr = insn->getIndirect(0, 0);
>}
> } else {
>if (src.isIndirect(0))
> - ptr = fetchSrc(src.getIndirect(0), 0, NULL);
> + ptr = shiftAddress(fetchSrc(src.getIndirect(0), 0, NULL));
>
>// We can assume that the fixed index will point to an input of the 
> same
>// interpolation type in case of an indirect.
> @@ -3144,10 +3145,10 @@ Converter::handleINTERP(Value *dst[4])
>insn = mkOp1(op, TYPE_F32, dst[c], sym[c] ? sym[c] : srcToSym(src, c));
>if (op == OP_PINTERP)
>   insn->setSrc(1, w);
> -  if (ptr)
> - insn->setIndirect(0, 0, ptr);
>if (offset)
>   insn->setSrc(op == OP_PINTERP ? 2 : 1, offset);
> +  if (ptr)
> + insn->setIndirect(0, 0, ptr);
>
>insn->setInterpolate(mode);
> }
> --
> 2.16.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50/ir: add more advanced slct constant folding code

2018-03-27 Thread Karol Herbst
From: Karol Herbst <karolher...@gmail.com>

shader-db changes:
total instructions in shared programs : 5894114 -> 5887031 (-0.12%)
total gprs used in shared programs: 666558 -> 666514 (-0.01%)
total shared used in shared programs  : 520416 -> 520416 (0.00%)
total local used in shared programs   : 53524 -> 53572 (0.09%)
total bytes used in shared programs   : 54006744 -> 53942072 (-0.12%)

local sharedgpr   inst  bytes
helped   3   0  36 936 936
  hurt  10   0   5   0   0

increase in local use is related to a bug in the spilling code

Signed-off-by: Karol Herbst <karolher...@gmail.com>
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 32 +++---
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 48cf74950df..18d5456b8fd 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -636,11 +636,35 @@ ConstantFolding::expr(Instruction *i,
  return;
   }
   break;
-   case OP_SLCT:
-  if (a->data.u32 != b->data.u32)
+   case OP_SLCT: {
+  CmpInstruction *slct = i->asCmp();
+  // slct(a, a, c) -> a
+  if (a->data.u32 == b->data.u32) {
+ res.data.u32 = a->data.u32;
+ break;
+  }
+  // slct(-1, 0, c) -> set(c, 0)
+  if (a->data.u32 == 0x &&
+  b->data.u32 == 0x0) {
+ i->op = OP_SET;
+ i->setSrc(0, i->getSrc(2));
+ i->setSrc(2, NULL);
+ i->dType = TYPE_U32;
  return;
-  res.data.u32 = a->data.u32;
-  break;
+  }
+  // slct(0, -1, c) -> !set(c, 0)
+  if (a->data.u32 == 0x0 &&
+  b->data.u32 == 0x) {
+ i->op = OP_SET;
+ i->swapSources(0, 1);
+ i->setSrc(0, i->getSrc(2));
+ i->setSrc(2, NULL);
+ i->dType = TYPE_U32;
+ slct->setCondition(inverseCondCode(slct->getCondition()));
+ return;
+  }
+  return;
+   }
case OP_EXTBF: {
   int offset = b->data.u32 & 0xff;
   int width = (b->data.u32 >> 8) & 0xff;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: optimise slct(t, f, set) to mov(set) or not(set)

2018-03-27 Thread Karol Herbst
just noticed I sent out the wrong version of that patch...

On Tue, Mar 27, 2018 at 10:50 PM, Karol Herbst <kher...@redhat.com> wrote:
> From: Karol Herbst <karolher...@gmail.com>
>
> helps mainly Feral-ported games
>
> changes in shader-db:
> total instructions in shared programs : 3940749 -> 3935015 (-0.15%)
> total gprs used in shared programs: 481460 -> 481433 (-0.01%)
> total local used in shared programs   : 27481 -> 27513 (0.12%)
> total bytes used in shared programs   : 36115776 -> 36063344 (-0.15%)
>
> localgpr   inst  bytes
> helped   6  31 854 854
>   hurt  10   5   1   1
> ---
>  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 26 
> --
>  1 file changed, 24 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 48cf74950df..1e3dea95494 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -637,9 +637,31 @@ ConstantFolding::expr(Instruction *i,
>}
>break;
> case OP_SLCT:
> -  if (a->data.u32 != b->data.u32)
> +  // slct(a, a, b) -> a
> +  if (a->data.u32 == b->data.u32) {
> + res.data.u32 = a->data.u32;
> +  } else {
> + // slct_ne(true, false, bool) -> !bool
> + CmpInstruction *slct = i->asCmp();
> + Instruction *set = i->getSrc(2)->getInsn();
> + if (!set || set->op != OP_SET)
> +return;
> + if (isFloatType(set->dType))
> +return;
> + if ((slct->getCondition() == CC_NE && imm0.isInteger(-1) && 
> imm1.isInteger(0)) ||
> + (slct->getCondition() == CC_EQ && imm0.isInteger(0) && 
> imm1.isInteger(-1))) {
> +bld.setPosition(i, false);
> +bld.mkOp1(OP_MOV, i->dType, i->getDef(0), i->getSrc(2));
> +delete_Instruction(prog, i);
> + } else if (
> + (slct->getCondition() == CC_EQ && imm0.isInteger(-1) && 
> imm1.isInteger(0)) ||
> + (slct->getCondition() == CC_NE && imm0.isInteger(0) && 
> imm1.isInteger(-1))) {
> +bld.setPosition(i, false);
> +bld.mkOp1(OP_NOT, i->dType, i->getDef(0), i->getSrc(2));
> +delete_Instruction(prog, i);
> + }
>   return;
> -  res.data.u32 = a->data.u32;
> +  }
>break;
> case OP_EXTBF: {
>int offset = b->data.u32 & 0xff;
> --
> 2.14.3
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: optimise slct(t, f, set) to mov(set) or not(set)

2018-03-27 Thread Karol Herbst
On Tue, Mar 27, 2018 at 11:04 PM, Ilia Mirkin <imir...@alum.mit.edu> wrote:
> On Tue, Mar 27, 2018 at 4:50 PM, Karol Herbst <kher...@redhat.com> wrote:
>> From: Karol Herbst <karolher...@gmail.com>
>>
>> helps mainly Feral-ported games
>>
>> changes in shader-db:
>> total instructions in shared programs : 3940749 -> 3935015 (-0.15%)
>> total gprs used in shared programs: 481460 -> 481433 (-0.01%)
>> total local used in shared programs   : 27481 -> 27513 (0.12%)
>> total bytes used in shared programs   : 36115776 -> 36063344 (-0.15%)
>>
>> localgpr   inst  bytes
>> helped   6  31 854 854
>>   hurt  10   5   1   1
>
> Can you look at the local memory regressions and see what happened?
> Seems like local went up a lot.
>

well yeah, it only happened in shaders which were already spilling and
we ended up with spilling sillyness again where a spilled value was
loaded from lmem allthough we could have used the registers in that
BB.

>> ---
>>  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 26 
>> --
>>  1 file changed, 24 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> index 48cf74950df..1e3dea95494 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> @@ -637,9 +637,31 @@ ConstantFolding::expr(Instruction *i,
>>}
>>break;
>> case OP_SLCT:
>> -  if (a->data.u32 != b->data.u32)
>> +  // slct(a, a, b) -> a
>> +  if (a->data.u32 == b->data.u32) {
>> + res.data.u32 = a->data.u32;
>> +  } else {
>> + // slct_ne(true, false, bool) -> !bool
>> + CmpInstruction *slct = i->asCmp();
>> + Instruction *set = i->getSrc(2)->getInsn();
>> + if (!set || set->op != OP_SET)
>> +return;
>> + if (isFloatType(set->dType))
>> +return;
>> + if ((slct->getCondition() == CC_NE && imm0.isInteger(-1) && 
>> imm1.isInteger(0)) ||
>> + (slct->getCondition() == CC_EQ && imm0.isInteger(0) && 
>> imm1.isInteger(-1))) {
>> +bld.setPosition(i, false);
>> +bld.mkOp1(OP_MOV, i->dType, i->getDef(0), i->getSrc(2));
>> +delete_Instruction(prog, i);
>> + } else if (
>> + (slct->getCondition() == CC_EQ && imm0.isInteger(-1) && 
>> imm1.isInteger(0)) ||
>> + (slct->getCondition() == CC_NE && imm0.isInteger(0) && 
>> imm1.isInteger(-1))) {
>> +bld.setPosition(i, false);
>> +bld.mkOp1(OP_NOT, i->dType, i->getDef(0), i->getSrc(2));
>
> dType should always be U32 for MOV and NOT.
>
>> +delete_Instruction(prog, i);
>> + }
>
> Don't forget to indicate that you made progress (if you did).
>
>>   return;
>> -  res.data.u32 = a->data.u32;
>> +  }
>>break;
>> case OP_EXTBF: {
>>int offset = b->data.u32 & 0xff;
>> --
>> 2.14.3
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: optimise slct(t, f, set) to mov(set) or not(set)

2018-03-27 Thread Karol Herbst
On Tue, Mar 27, 2018 at 11:19 PM, Ian Romanick <i...@freedesktop.org> wrote:
> It will be interesting to see if this still occurs after nouveau
> finishes switching to NIR.  There's a pattern in nir_opt_algebraic for this.
>

well there is no plan to switch to NIR for everything where we can use TGSI.

> On 03/27/2018 01:50 PM, Karol Herbst wrote:
>> From: Karol Herbst <karolher...@gmail.com>
>>
>> helps mainly Feral-ported games
>>
>> changes in shader-db:
>> total instructions in shared programs : 3940749 -> 3935015 (-0.15%)
>> total gprs used in shared programs: 481460 -> 481433 (-0.01%)
>> total local used in shared programs   : 27481 -> 27513 (0.12%)
>> total bytes used in shared programs   : 36115776 -> 36063344 (-0.15%)
>>
>> localgpr   inst  bytes
>> helped   6  31 854 854
>>   hurt  10   5   1   1
>> ---
>>  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 26 
>> --
>>  1 file changed, 24 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> index 48cf74950df..1e3dea95494 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> @@ -637,9 +637,31 @@ ConstantFolding::expr(Instruction *i,
>>}
>>break;
>> case OP_SLCT:
>> -  if (a->data.u32 != b->data.u32)
>> +  // slct(a, a, b) -> a
>> +  if (a->data.u32 == b->data.u32) {
>> + res.data.u32 = a->data.u32;
>> +  } else {
>> + // slct_ne(true, false, bool) -> !bool
>> + CmpInstruction *slct = i->asCmp();
>> + Instruction *set = i->getSrc(2)->getInsn();
>> + if (!set || set->op != OP_SET)
>> +return;
>> + if (isFloatType(set->dType))
>> +return;
>> + if ((slct->getCondition() == CC_NE && imm0.isInteger(-1) && 
>> imm1.isInteger(0)) ||
>> + (slct->getCondition() == CC_EQ && imm0.isInteger(0) && 
>> imm1.isInteger(-1))) {
>> +bld.setPosition(i, false);
>> +bld.mkOp1(OP_MOV, i->dType, i->getDef(0), i->getSrc(2));
>> +delete_Instruction(prog, i);
>> + } else if (
>> + (slct->getCondition() == CC_EQ && imm0.isInteger(-1) && 
>> imm1.isInteger(0)) ||
>> + (slct->getCondition() == CC_NE && imm0.isInteger(0) && 
>> imm1.isInteger(-1))) {
>> +bld.setPosition(i, false);
>> +bld.mkOp1(OP_NOT, i->dType, i->getDef(0), i->getSrc(2));
>> +delete_Instruction(prog, i);
>> + }
>>   return;
>> -  res.data.u32 = a->data.u32;
>> +  }
>>break;
>> case OP_EXTBF: {
>>int offset = b->data.u32 & 0xff;
>>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50/ir: optimise slct(t, f, set) to mov(set) or not(set)

2018-03-27 Thread Karol Herbst
From: Karol Herbst <karolher...@gmail.com>

helps mainly Feral-ported games

changes in shader-db:
total instructions in shared programs : 3940749 -> 3935015 (-0.15%)
total gprs used in shared programs: 481460 -> 481433 (-0.01%)
total local used in shared programs   : 27481 -> 27513 (0.12%)
total bytes used in shared programs   : 36115776 -> 36063344 (-0.15%)

localgpr   inst  bytes
helped   6  31 854 854
  hurt  10   5   1   1
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 26 --
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 48cf74950df..1e3dea95494 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -637,9 +637,31 @@ ConstantFolding::expr(Instruction *i,
   }
   break;
case OP_SLCT:
-  if (a->data.u32 != b->data.u32)
+  // slct(a, a, b) -> a
+  if (a->data.u32 == b->data.u32) {
+ res.data.u32 = a->data.u32;
+  } else {
+ // slct_ne(true, false, bool) -> !bool
+ CmpInstruction *slct = i->asCmp();
+ Instruction *set = i->getSrc(2)->getInsn();
+ if (!set || set->op != OP_SET)
+return;
+ if (isFloatType(set->dType))
+return;
+ if ((slct->getCondition() == CC_NE && imm0.isInteger(-1) && 
imm1.isInteger(0)) ||
+ (slct->getCondition() == CC_EQ && imm0.isInteger(0) && 
imm1.isInteger(-1))) {
+bld.setPosition(i, false);
+bld.mkOp1(OP_MOV, i->dType, i->getDef(0), i->getSrc(2));
+delete_Instruction(prog, i);
+ } else if (
+ (slct->getCondition() == CC_EQ && imm0.isInteger(-1) && 
imm1.isInteger(0)) ||
+ (slct->getCondition() == CC_NE && imm0.isInteger(0) && 
imm1.isInteger(-1))) {
+bld.setPosition(i, false);
+bld.mkOp1(OP_NOT, i->dType, i->getDef(0), i->getSrc(2));
+delete_Instruction(prog, i);
+ }
  return;
-  res.data.u32 = a->data.u32;
+  }
   break;
case OP_EXTBF: {
   int offset = b->data.u32 & 0xff;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50/ra: prefer def == src2 for mad/sad with immediates on nvc0

2018-03-27 Thread Karol Herbst
From: Karol Herbst <karolher...@gmail.com>

This helps with the PostRALoadPropagation pass moving long immediates into
FMA/MAD instructions.

changes in shader-db:
total instructions in shared programs : 5894114 -> 5886074 (-0.14%)
total gprs used in shared programs: 666558 -> 666563 (0.00%)
total shared used in shared programs  : 520416 -> 520416 (0.00%)
total local used in shared programs   : 53524 -> 53524 (0.00%)
total bytes used in shared programs   : 54006744 -> 53932472 (-0.14%)

local sharedgpr   inst  bytes
helped   0   0   241924192
  hurt   0   0   7   9   9

Signed-off-by: Karol Herbst <karolher...@gmail.com>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 30 ++
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 3a0e56e1385..aeaf1ebe8f0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1466,17 +1466,27 @@ GCRA::allocateRegisters(ArrayList& insns)
  nodes[i].init(regs, lval);
  RIG.insert([i]);
 
- if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL &&
- prog->getTarget()->getChipset() < 0xc0) {
+ if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL) {
 Instruction *insn = lval->getInsn();
-if (insn->op == OP_MAD || insn->op == OP_FMA || insn->op == OP_SAD)
-   // Short encoding only possible if they're all GPRs, no need to
-   // affect them otherwise.
-   if (insn->flagsDef < 0 &&
-   insn->src(0).getFile() == FILE_GPR &&
-   insn->src(1).getFile() == FILE_GPR &&
-   insn->src(2).getFile() == FILE_GPR)
-  
nodes[i].addRegPreference(getNode(insn->getSrc(2)->asLValue()));
+if (insn->op != OP_MAD && insn->op != OP_FMA && insn->op != OP_SAD)
+   continue;
+// Short encoding or load propagate immediates only possible if
+// they're all GPRs, no need to affect them otherwise.
+if (insn->flagsDef >= 0 ||
+insn->src(0).getFile() != FILE_GPR ||
+insn->src(1).getFile() != FILE_GPR ||
+insn->src(2).getFile() != FILE_GPR)
+   continue;
+// for nvc0+ we can loadpropagate limms only if we have
+// dest == src2 reg id. Using getImmediate here is fine because
+// we only set a reg preference and leave the immediate alone.
+ImmediateValue imm;
+if (prog->getTarget()->getChipset() >= 0xc0 &&
+!insn->src(0).getImmediate(imm) &&
+!insn->src(1).getImmediate(imm))
+   continue;
+
+nodes[i].addRegPreference(getNode(insn->getSrc(2)->asLValue()));
  }
   }
}
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nvc0/ir: fix emiting NOTs with predicates

2018-03-27 Thread Karol Herbst
From: Karol Herbst <karolher...@gmail.com>

Signed-off-by: Karol Herbst <karolher...@gmail.com>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index e2c41a0e264..2f7dbd9519f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -853,6 +853,8 @@ void
 CodeEmitterNVC0::emitNOT(Instruction *i)
 {
assert(i->encSize == 8);
+   if (i->getPredicate())
+  i->moveSources(1, 1);
i->setSrc(1, i->src(0));
emitForm_A(i, HEX64(6800, 01c3));
 }
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v5 18/21] clover: Handle CL_PROGRAM_IL in clGetProgramInfo

2018-03-26 Thread Karol Herbst
Reviewed-by: Karol Herbst <kher...@redhat.com>

On Sun, Mar 25, 2018 at 8:02 PM, Pierre Moreau <pierre.mor...@free.fr> wrote:
> Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> ---
>  src/gallium/state_trackers/clover/api/program.cpp | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/src/gallium/state_trackers/clover/api/program.cpp 
> b/src/gallium/state_trackers/clover/api/program.cpp
> index 3c7e56efb9..851a212b99 100644
> --- a/src/gallium/state_trackers/clover/api/program.cpp
> +++ b/src/gallium/state_trackers/clover/api/program.cpp
> @@ -429,6 +429,13 @@ clGetProgramInfo(cl_program d_prog, cl_program_info 
> param,
>buf.as_string() = prog.source();
>break;
>
> +   case CL_PROGRAM_IL:
> +  if (prog.has_il)
> + buf.as_vector() = prog.il();
> +  else if (r_size)
> + *r_size = 0u;
> +  break;
> +
> case CL_PROGRAM_BINARY_SIZES:
>buf.as_vector() = map([&](const device ) {
>  return prog.build(dev).binary.size();
> --
> 2.16.3
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 06/19] RFC: nir/vtn: "raw" pointer support

2018-03-25 Thread Karol Herbst
On Sun, Mar 25, 2018 at 2:18 PM, Rob Clark <robdcl...@gmail.com> wrote:
> On Sun, Mar 25, 2018 at 6:35 AM, Karol Herbst <kher...@redhat.com> wrote:
>> On Sun, Mar 25, 2018 at 12:18 AM, Rob Clark <robdcl...@gmail.com> wrote:
>>> On Fri, Mar 23, 2018 at 5:18 PM, Jason Ekstrand <ja...@jlekstrand.net> 
>>> wrote:
>>>> On Fri, Mar 23, 2018 at 2:15 PM, Karol Herbst <kher...@redhat.com> wrote:
>>>>>
>>>>> On Fri, Mar 23, 2018 at 10:07 PM, Jason Ekstrand <ja...@jlekstrand.net>
>>>>> wrote:
>>>>> > +list
>>>>> >
>>>>> > On Fri, Mar 23, 2018 at 1:45 PM, Karol Herbst <kher...@redhat.com>
>>>>> > wrote:
>>>>> >>
>>>>> >> On Fri, Mar 23, 2018 at 9:30 PM, Jason Ekstrand <ja...@jlekstrand.net>
>>>>> >> wrote:
>>>>> >> > As I've been rewriting core NIR deref handling, I've been thinking
>>>>> >> > about
>>>>> >> > this problem quite a bit.  One objective I have is to actually make
>>>>> >> > UBO
>>>>> >> > and
>>>>> >> > SSBO access go through derefs instead of just being an offset and
>>>>> >> > index
>>>>> >> > so
>>>>> >> > that the compiler can better reason about them.  In particular, I
>>>>> >> > want
>>>>> >> > to be
>>>>> >> > able to start doing load/store elimination on SSBOs, SLM, and
>>>>> >> > whatever
>>>>> >> > CL
>>>>> >> > has which would be great for everyone's compute performance (GL,
>>>>> >> > Vulkan,
>>>>> >> > CL,
>>>>> >> > etc.).
>>>>> >> >
>>>>> >> > I would be lying if I said I had a full plan but I do have part of a
>>>>> >> > plan.
>>>>> >> > In my patch which adds the deref instructions, I add a new "cast"
>>>>> >> > deref
>>>>> >> > type
>>>>> >> > which takes an arbitrary value as it's source and kicks out a deref
>>>>> >> > with
>>>>> >> > a
>>>>> >> > type.  Whenever we discover that the source of the cast is actually
>>>>> >> > another
>>>>> >> > deref which is compatible (same type etc.), copy propagation gets rid
>>>>> >> > of
>>>>> >> > the
>>>>> >> > cast for you.  The idea is that, instead of doing a
>>>>> >> > load_raw(raw_ptr),
>>>>> >> > you
>>>>> >> > would do a load((type *)raw_ptr).
>>>>> >> >
>>>>> >> > Right now, most of the core NIR optimizations will throw a fit if
>>>>> >> > they
>>>>> >> > ever
>>>>> >> > see a cast.  This is intentional because it requires us to manually
>>>>> >> > go
>>>>> >> > through and handle casts.  This would mean that, at the moment, you
>>>>> >> > would
>>>>> >> > have to lower to load_raw intrinsics almost immediately after coming
>>>>> >> > out
>>>>> >> > of
>>>>> >> > SPIR-V.
>>>>> >> >
>>>>> >>
>>>>> >> Well it gets more fun with OpenCL 2.0 where you can have generic
>>>>> >> pointer where you only know the type at creation type. You can also
>>>>> >> declare generic pointers as function inputs in a way, that you never
>>>>> >> actually know from where you have to load if you only have that one
>>>>> >> function. So the actual load operation depends on when you create the
>>>>> >> initial pointer variable (you can cast from X to generic, but not the
>>>>> >> other way around).
>>>>> >>
>>>>> >> Which in the end means you can end up with load(generic_ptr) and only
>>>>> >> following the chain up to it's creation (with function inlining in
>>>>> >> mind) you know the actual memory target.
>>>>> >
>>>>> >
>>>>>

Re: [Mesa-dev] [PATCH v3 06/19] RFC: nir/vtn: "raw" pointer support

2018-03-25 Thread Karol Herbst
On Sun, Mar 25, 2018 at 12:18 AM, Rob Clark <robdcl...@gmail.com> wrote:
> On Fri, Mar 23, 2018 at 5:18 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
>> On Fri, Mar 23, 2018 at 2:15 PM, Karol Herbst <kher...@redhat.com> wrote:
>>>
>>> On Fri, Mar 23, 2018 at 10:07 PM, Jason Ekstrand <ja...@jlekstrand.net>
>>> wrote:
>>> > +list
>>> >
>>> > On Fri, Mar 23, 2018 at 1:45 PM, Karol Herbst <kher...@redhat.com>
>>> > wrote:
>>> >>
>>> >> On Fri, Mar 23, 2018 at 9:30 PM, Jason Ekstrand <ja...@jlekstrand.net>
>>> >> wrote:
>>> >> > As I've been rewriting core NIR deref handling, I've been thinking
>>> >> > about
>>> >> > this problem quite a bit.  One objective I have is to actually make
>>> >> > UBO
>>> >> > and
>>> >> > SSBO access go through derefs instead of just being an offset and
>>> >> > index
>>> >> > so
>>> >> > that the compiler can better reason about them.  In particular, I
>>> >> > want
>>> >> > to be
>>> >> > able to start doing load/store elimination on SSBOs, SLM, and
>>> >> > whatever
>>> >> > CL
>>> >> > has which would be great for everyone's compute performance (GL,
>>> >> > Vulkan,
>>> >> > CL,
>>> >> > etc.).
>>> >> >
>>> >> > I would be lying if I said I had a full plan but I do have part of a
>>> >> > plan.
>>> >> > In my patch which adds the deref instructions, I add a new "cast"
>>> >> > deref
>>> >> > type
>>> >> > which takes an arbitrary value as it's source and kicks out a deref
>>> >> > with
>>> >> > a
>>> >> > type.  Whenever we discover that the source of the cast is actually
>>> >> > another
>>> >> > deref which is compatible (same type etc.), copy propagation gets rid
>>> >> > of
>>> >> > the
>>> >> > cast for you.  The idea is that, instead of doing a
>>> >> > load_raw(raw_ptr),
>>> >> > you
>>> >> > would do a load((type *)raw_ptr).
>>> >> >
>>> >> > Right now, most of the core NIR optimizations will throw a fit if
>>> >> > they
>>> >> > ever
>>> >> > see a cast.  This is intentional because it requires us to manually
>>> >> > go
>>> >> > through and handle casts.  This would mean that, at the moment, you
>>> >> > would
>>> >> > have to lower to load_raw intrinsics almost immediately after coming
>>> >> > out
>>> >> > of
>>> >> > SPIR-V.
>>> >> >
>>> >>
>>> >> Well it gets more fun with OpenCL 2.0 where you can have generic
>>> >> pointer where you only know the type at creation type. You can also
>>> >> declare generic pointers as function inputs in a way, that you never
>>> >> actually know from where you have to load if you only have that one
>>> >> function. So the actual load operation depends on when you create the
>>> >> initial pointer variable (you can cast from X to generic, but not the
>>> >> other way around).
>>> >>
>>> >> Which in the end means you can end up with load(generic_ptr) and only
>>> >> following the chain up to it's creation (with function inlining in
>>> >> mind) you know the actual memory target.
>>> >
>>> >
>>> > Yup.  And there will always be crazy cases where you can't actually
>>> > follow
>>> > it and you have to emit a pile of code to load different ways depending
>>> > on
>>> > some bits somewhere that tell you how to load it.  I'm well aware of the
>>> > insanity. :-)  This is part of the reason why I'm glad I'm not trying to
>>> > write an OpenCL 2.0 driver.
>>> >
>>> > This insanity is exactly why I'm suggesting the pointer casting.  Sure,
>>> > you
>>> > may not know the data type until the actual load.  In that case, you end
>>> > up
>>> > with the cast being right before the load.  If you don't know the
>>> > storage
>>> 

Re: [Mesa-dev] [PATCH v3 06/19] RFC: nir/vtn: "raw" pointer support

2018-03-23 Thread Karol Herbst
On Fri, Mar 23, 2018 at 10:07 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> +list
>
> On Fri, Mar 23, 2018 at 1:45 PM, Karol Herbst <kher...@redhat.com> wrote:
>>
>> On Fri, Mar 23, 2018 at 9:30 PM, Jason Ekstrand <ja...@jlekstrand.net>
>> wrote:
>> > As I've been rewriting core NIR deref handling, I've been thinking about
>> > this problem quite a bit.  One objective I have is to actually make UBO
>> > and
>> > SSBO access go through derefs instead of just being an offset and index
>> > so
>> > that the compiler can better reason about them.  In particular, I want
>> > to be
>> > able to start doing load/store elimination on SSBOs, SLM, and whatever
>> > CL
>> > has which would be great for everyone's compute performance (GL, Vulkan,
>> > CL,
>> > etc.).
>> >
>> > I would be lying if I said I had a full plan but I do have part of a
>> > plan.
>> > In my patch which adds the deref instructions, I add a new "cast" deref
>> > type
>> > which takes an arbitrary value as it's source and kicks out a deref with
>> > a
>> > type.  Whenever we discover that the source of the cast is actually
>> > another
>> > deref which is compatible (same type etc.), copy propagation gets rid of
>> > the
>> > cast for you.  The idea is that, instead of doing a load_raw(raw_ptr),
>> > you
>> > would do a load((type *)raw_ptr).
>> >
>> > Right now, most of the core NIR optimizations will throw a fit if they
>> > ever
>> > see a cast.  This is intentional because it requires us to manually go
>> > through and handle casts.  This would mean that, at the moment, you
>> > would
>> > have to lower to load_raw intrinsics almost immediately after coming out
>> > of
>> > SPIR-V.
>> >
>>
>> Well it gets more fun with OpenCL 2.0 where you can have generic
>> pointer where you only know the type at creation type. You can also
>> declare generic pointers as function inputs in a way, that you never
>> actually know from where you have to load if you only have that one
>> function. So the actual load operation depends on when you create the
>> initial pointer variable (you can cast from X to generic, but not the
>> other way around).
>>
>> Which in the end means you can end up with load(generic_ptr) and only
>> following the chain up to it's creation (with function inlining in
>> mind) you know the actual memory target.
>
>
> Yup.  And there will always be crazy cases where you can't actually follow
> it and you have to emit a pile of code to load different ways depending on
> some bits somewhere that tell you how to load it.  I'm well aware of the
> insanity. :-)  This is part of the reason why I'm glad I'm not trying to
> write an OpenCL 2.0 driver.
>
> This insanity is exactly why I'm suggesting the pointer casting.  Sure, you
> may not know the data type until the actual load.  In that case, you end up
> with the cast being right before the load.  If you don't know the storage
> class, maybe you have to switch and do multiple casts based on some bits.
> Alternatively, if you don't know the storage class, we can just let the
> deref mode be 0 for "I don't know". or maybe multiple bits for "these are
> the things it might be".  In any case, I think we can handle it.
>

there shouldn't be a situation where we don't know, except when you
don't inline all functions. I think Rob had the idea of fat pointers
where a pointer is a vec2 and the 2nd component contains the actual
pointer type and you end up with a switch over the type to get the
correct storage class. And if the compiler inlines all functions, it
should be able to optimize that switch away.

> It's insane but we need some sort of structure to be able to reason about
> the insanity.  Immediately lowering everything to load_raw is a good way to
> get a driver off the ground.  What it's not so good for is making an
> optimizing compiler that can reason about these crazy pointers and actually
> optimize them.  Lest I sound too negative, I'm 100% fine with taking a short
> path to getting something working now so long as it doesn't cloud up our
> ability to do better in the future.
>
>>
>> And I think the issue here is not that it is some kind of raw pointer
>> in the patch, but more like an unbound/physical pointer, which doesn't
>> relate to any variable. It is just a value like any other int/long as
>> well.
>>
>> > On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst <

Re: [Mesa-dev] [PATCH v3 15/19] nir: use load_local_group_size

2018-03-23 Thread Karol Herbst
On Fri, Mar 23, 2018 at 9:18 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst <kher...@redhat.com> wrote:
>>
>> From: Rob Clark <robdcl...@gmail.com>
>>
>> If local_size is not known at compile time, which is the case with
>> clover, use the load_local_group_size intrinsic instead.
>>
>> Signed-off-by: Karol Herbst <kher...@redhat.com>
>> ---
>>  src/compiler/nir/nir_lower_system_values.c | 25 +
>>  1 file changed, 17 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/compiler/nir/nir_lower_system_values.c
>> b/src/compiler/nir/nir_lower_system_values.c
>> index d507c28f421..ff4e09c8e61 100644
>> --- a/src/compiler/nir/nir_lower_system_values.c
>> +++ b/src/compiler/nir/nir_lower_system_values.c
>> @@ -57,19 +57,28 @@ convert_block(nir_block *block, nir_builder *b)
>>*"The value of gl_GlobalInvocationID is equal to
>>*gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
>>*/
>> + nir_ssa_def *local_size_def;
>>
>> - nir_const_value local_size;
>> - memset(_size, 0, sizeof(local_size));
>> - local_size.u64[0] = b->shader->info.cs.local_size[0];
>> - local_size.u64[1] = b->shader->info.cs.local_size[1];
>> - local_size.u64[2] = b->shader->info.cs.local_size[2];
>> + /* if local_size[] is already known, use that, otherwise use
>> +  * load_local_group_size intrinsic:
>> +  */
>> + if (b->shader->info.cs.local_size[0]) {
>> +nir_const_value local_size;
>> +memset(_size, 0, sizeof(local_size));
>> +local_size.u64[0] = b->shader->info.cs.local_size[0];
>> +local_size.u64[1] = b->shader->info.cs.local_size[1];
>> +local_size.u64[2] = b->shader->info.cs.local_size[2];
>> +
>> +local_size_def = nir_build_imm(b, 3, bit_size, local_size);
>>
>> + } else {
>> +local_size_def = nir_load_local_group_size(b, bit_size);
>> + }
>
>
> I commented on an earlier patch about how the approach to building the
> 32/64-bit immediates is wrong.
>

oh right, I totally forgot about that.

> Setting that aside, this patch looks fine to me in principal.  There's a
> part of me that doesn't like using cs.local_size[0] being the trigger but I
> think it's probably ok.  Maybe we should assert that cs_local_size is either
> all zero (second case) or all not zero (first case) just to be safe.
>

I think the main problem here is, that even with OpenCL kernels you
can specify it, but then overwrite it at runtime again. So yes I
agree, that we need something better here.

>>
>>
>>   nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);
>>   nir_ssa_def *local_id = nir_load_local_invocation_id(b,
>> bit_size);
>>
>> - sysval = nir_iadd(b, nir_imul(b, group_id,
>> -   nir_build_imm(b, 3, bit_size,
>> local_size)),
>> -  local_id);
>> + sysval = nir_iadd(b, nir_imul(b, group_id, local_size_def),
>> +   local_id);
>>   break;
>>}
>>
>> --
>> 2.14.3
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 12/19] nir: specify bit_size when loading system values

2018-03-23 Thread Karol Herbst
On Fri, Mar 23, 2018 at 9:15 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst <kher...@redhat.com> wrote:
>>
>> With OpenCL the size of some system value depends on the Physical model
>> choosen, so we need a way to load any system value as 32 or 64 bit.
>>
>> Signed-off-by: Karol Herbst <kher...@redhat.com>
>> ---
>>  src/compiler/nir/nir_builder.h   | 10 +---
>>  src/compiler/nir/nir_lower_alpha_test.c  |  2 +-
>>  src/compiler/nir/nir_lower_clip.c|  3 ++-
>>  src/compiler/nir/nir_lower_subgroups.c   |  8 +++---
>>  src/compiler/nir/nir_lower_system_values.c   | 31
>> 
>>  src/compiler/nir/nir_lower_two_sided_color.c |  2 +-
>>  src/compiler/nir/nir_lower_wpos_center.c |  2 +-
>>  src/compiler/spirv/vtn_subgroup.c|  2 +-
>>  src/gallium/auxiliary/nir/tgsi_to_nir.c  |  3 ++-
>>  src/intel/blorp/blorp_blit.c |  2 +-
>>  src/intel/blorp/blorp_clear.c|  2 +-
>>  src/intel/compiler/brw_nir_lower_cs_intrinsics.c |  6 ++---
>>  src/mesa/drivers/dri/i965/brw_tcs.c  |  2 +-
>>  13 files changed, 40 insertions(+), 35 deletions(-)
>>
>> diff --git a/src/compiler/nir/nir_builder.h
>> b/src/compiler/nir/nir_builder.h
>> index 36e0ae3ac63..4e93cd08169 100644
>> --- a/src/compiler/nir/nir_builder.h
>> +++ b/src/compiler/nir/nir_builder.h
>> @@ -612,13 +612,14 @@ nir_copy_var(nir_builder *build, nir_variable *dest,
>> nir_variable *src)
>>
>>  /* Generic builder for system values. */
>>  static inline nir_ssa_def *
>> -nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
>> +nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index,
>> +  unsigned bit_size)
>>  {
>> nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader,
>> op);
>> load->num_components = nir_intrinsic_infos[op].dest_components;
>> load->const_index[0] = index;
>> nir_ssa_dest_init(>instr, >dest,
>> - nir_intrinsic_infos[op].dest_components, 32, NULL);
>> + nir_intrinsic_infos[op].dest_components, bit_size,
>> NULL);
>> nir_builder_instr_insert(build, >instr);
>> return >dest.ssa;
>>  }
>> @@ -630,9 +631,10 @@ nir_load_system_value(nir_builder *build,
>> nir_intrinsic_op op, int index)
>>
>>  #define DEFINE_SYSTEM_VALUE(name)
>> \
>> static inline nir_ssa_def *
>> \
>> -   nir_load_##name(nir_builder *build)
>> \
>> +   nir_load_##name(nir_builder *build, unsigned bit_size)
>> \
>
>
> I was really hoping that this change wouldn't touch every single intrinsic
> helper.  Maybe with Rob's python-based intrinsics table we can do something
> better.
>

I was kind of thinking of declaring builtins as either 32, 64 or 32/64
bit and just generate a function with a bit_size argument for the
later maybe, but I think we really want to do that in python and not
with C preprocessor macros :)

>>
>> {
>> \
>> -  return nir_load_system_value(build, nir_intrinsic_load_##name, 0);
>> \
>> +  return nir_load_system_value(build, nir_intrinsic_load_##name, 0,
>> \
>> +   bit_size);
>> \
>> }
>>
>>  #include "nir_intrinsics.h"
>> diff --git a/src/compiler/nir/nir_lower_alpha_test.c
>> b/src/compiler/nir/nir_lower_alpha_test.c
>> index 6bf9ff142df..29f91ab9428 100644
>> --- a/src/compiler/nir/nir_lower_alpha_test.c
>> +++ b/src/compiler/nir/nir_lower_alpha_test.c
>> @@ -92,7 +92,7 @@ nir_lower_alpha_test(nir_shader *shader, enum
>> compare_func func,
>>
>> nir_ssa_def *condition =
>>nir_compare_func(, func,
>> -   alpha, nir_load_alpha_ref_float());
>> +   alpha, nir_load_alpha_ref_float(,
>> 32));
>>
>> nir_intrinsic_instr *discard =
>>nir_intrinsic_instr_create(b.shader,
>> diff --git a/src/compiler/nir/nir_lower_clip.c
>> b/src/compiler/nir/nir_lower_clip.c
>> index ea12f51a7bb..b9a91f7d40b 100644
>> --- a/src/compiler/nir/nir_lower_clip.c
>> +++ b/src/compiler/nir/nir_lower_clip.c
>> @@ -174,7 +174,8 @@ lower_clip_vs(nir_function_impl *impl, unsigned
>> ucp_enables,
>> for (int plane = 0; plane < MAX_CLIP_P

[Mesa-dev] [PATCH v3 16/19] nir: add load_kernel_param

2018-03-23 Thread Karol Herbst
OpenCL kernels have parameters (see pipe_grid_info::input), and so we
need a way to access them.

The offset source is the offset of the parameter to load in the kernel input
buffer.

v2: improve commit message
remove BASE
split lower_io changes into separate commit

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/nir/nir_intrinsics.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index 6597eaea87b..fb8d53b3c0d 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -495,6 +495,8 @@ LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | 
NIR_INTRINSIC_CAN_REOR
 LOAD(input, 1, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE | 
NIR_INTRINSIC_CAN_REORDER)
 /* src[] = { vertex, offset }. const_index[] = { base, component } */
 LOAD(per_vertex_input, 2, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE 
| NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { address }. No const_index */
+LOAD(kernel_param, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | 
NIR_INTRINSIC_CAN_REORDER)
 /* src[] = { barycoord, offset }. const_index[] = { base, component } */
 INTRINSIC(load_interpolated_input, 2, ARR(2, 1), true, 0, 0,
   2, BASE, COMPONENT, xx,
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 15/19] nir: use load_local_group_size

2018-03-23 Thread Karol Herbst
From: Rob Clark <robdcl...@gmail.com>

If local_size is not known at compile time, which is the case with
clover, use the load_local_group_size intrinsic instead.

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/nir/nir_lower_system_values.c | 25 +
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/compiler/nir/nir_lower_system_values.c 
b/src/compiler/nir/nir_lower_system_values.c
index d507c28f421..ff4e09c8e61 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -57,19 +57,28 @@ convert_block(nir_block *block, nir_builder *b)
   *"The value of gl_GlobalInvocationID is equal to
   *gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
   */
+ nir_ssa_def *local_size_def;
 
- nir_const_value local_size;
- memset(_size, 0, sizeof(local_size));
- local_size.u64[0] = b->shader->info.cs.local_size[0];
- local_size.u64[1] = b->shader->info.cs.local_size[1];
- local_size.u64[2] = b->shader->info.cs.local_size[2];
+ /* if local_size[] is already known, use that, otherwise use
+  * load_local_group_size intrinsic:
+  */
+ if (b->shader->info.cs.local_size[0]) {
+nir_const_value local_size;
+memset(_size, 0, sizeof(local_size));
+local_size.u64[0] = b->shader->info.cs.local_size[0];
+local_size.u64[1] = b->shader->info.cs.local_size[1];
+local_size.u64[2] = b->shader->info.cs.local_size[2];
+
+local_size_def = nir_build_imm(b, 3, bit_size, local_size);
+ } else {
+local_size_def = nir_load_local_group_size(b, bit_size);
+ }
 
  nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);
  nir_ssa_def *local_id = nir_load_local_invocation_id(b, bit_size);
 
- sysval = nir_iadd(b, nir_imul(b, group_id,
-   nir_build_imm(b, 3, bit_size, 
local_size)),
-  local_id);
+ sysval = nir_iadd(b, nir_imul(b, group_id, local_size_def),
+   local_id);
  break;
   }
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 17/19] RFC nir/lower_io: lower kernel entry param load_vars to load_kernel_param

2018-03-23 Thread Karol Herbst
For OpenCL kernels we have an input buffer where most of the parameters are
stored. For this we have to keep track of alignment and padding rules to
correctly identify the offset of each parameter inside that buffer.

For this we can just rely on the new cl_size and cl_alignment glsl_type
functions.

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/nir/nir_lower_io.c | 39 ---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index df91febd68d..ed8e361651c 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -39,6 +39,7 @@ struct lower_io_state {
int (*type_size)(const struct glsl_type *type);
nir_variable_mode modes;
nir_lower_io_options options;
+   unsigned *offsets;
 };
 
 void
@@ -159,7 +160,8 @@ lower_load(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
nir_ssa_def *vertex_index, nir_ssa_def *offset,
unsigned component)
 {
-   const nir_shader *nir = state->builder.shader;
+   nir_builder *b = >builder;
+   nir_shader *nir = b->shader;
nir_variable *var = intrin->variables[0]->var;
nir_variable_mode mode = var->data.mode;
nir_ssa_def *barycentric = NULL;
@@ -199,6 +201,11 @@ lower_load(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
case nir_var_shared:
   op = nir_intrinsic_load_shared;
   break;
+   case nir_var_param:
+  if (nir_cf_node_get_function(>instr.block->cf_node) == 
nir_shader_get_entrypoint(nir)) {
+ op = nir_intrinsic_load_kernel_param;
+ break;
+  }
default:
   unreachable("Unknown variable mode");
}
@@ -207,7 +214,9 @@ lower_load(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
   nir_intrinsic_instr_create(state->builder.shader, op);
load->num_components = intrin->num_components;
 
-   nir_intrinsic_set_base(load, var->data.driver_location);
+   if (op != nir_intrinsic_load_kernel_param)
+  nir_intrinsic_set_base(load, var->data.driver_location);
+
if (mode == nir_var_shader_in || mode == nir_var_shader_out)
   nir_intrinsic_set_component(load, component);
 
@@ -220,6 +229,8 @@ lower_load(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
} else if (barycentric) {
   load->src[0] = nir_src_for_ssa(barycentric);
   load->src[1] = nir_src_for_ssa(offset);
+   } else if (op == nir_intrinsic_load_kernel_param) {
+  load->src[0] = nir_src_for_ssa(nir_imm_int(b, 
state->offsets[var->data.location]));
} else {
   load->src[0] = nir_src_for_ssa(offset);
}
@@ -407,7 +418,8 @@ nir_lower_io_block(nir_block *block,
   if (mode != nir_var_shader_in &&
   mode != nir_var_shader_out &&
   mode != nir_var_shared &&
-  mode != nir_var_uniform)
+  mode != nir_var_uniform &&
+  mode != nir_var_param)
  continue;
 
   b->cursor = nir_before_instr(instr);
@@ -481,6 +493,22 @@ nir_lower_io_block(nir_block *block,
return progress;
 }
 
+static void
+nir_lower_io_calc_param_offsets(struct lower_io_state *state,
+nir_function_impl *impl)
+{
+   state->offsets = ralloc_array(state->builder.shader, unsigned,
+ impl->num_params);
+   state->offsets[0] = 0;
+   for (int i = 0; i < impl->num_params; ++i) {
+  nir_variable *var = impl->params[i];
+  state->offsets[i] = align(state->offsets[i], 
glsl_get_cl_alignment(var->type));
+  if (i + 1 < impl->num_params)
+ state->offsets[i + 1] = state->offsets[i] + 
glsl_get_cl_size(var->type);
+   }
+   ralloc_free(state->offsets);
+}
+
 static bool
 nir_lower_io_impl(nir_function_impl *impl,
   nir_variable_mode modes,
@@ -495,6 +523,11 @@ nir_lower_io_impl(nir_function_impl *impl,
state.type_size = type_size;
state.options = options;
 
+   if (modes & nir_var_param &&
+   impl == nir_shader_get_entrypoint(state.builder.shader) &&
+   impl->num_params)
+  nir_lower_io_calc_param_offsets(, impl);
+
nir_foreach_block(block, impl) {
   progress |= nir_lower_io_block(block, );
}
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 19/19] RFC: nir/vtn: member in struct deref

2018-03-23 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/spirv/vtn_private.h   |  5 +++--
 src/compiler/spirv/vtn_variables.c | 14 +++---
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h
index 510c12faa87..45b581bf80e 100644
--- a/src/compiler/spirv/vtn_private.h
+++ b/src/compiler/spirv/vtn_private.h
@@ -700,12 +700,13 @@ void vtn_local_store(struct vtn_builder *b, struct 
vtn_ssa_value *src,
  nir_deref_var *dest);
 
 struct vtn_ssa_value *vtn_pointer_load(struct vtn_builder *b,
-   struct vtn_pointer *ptr);
+   struct vtn_pointer *ptr,
+   struct vtn_type *);
 void vtn_pointer_store(struct vtn_builder *b, struct vtn_ssa_value *src,
struct vtn_pointer *ptr);
 
 struct vtn_ssa_value *
-vtn_variable_load(struct vtn_builder *b, struct vtn_pointer *src);
+vtn_variable_load(struct vtn_builder *b, struct vtn_pointer *src, struct 
vtn_type *);
 
 void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
 struct vtn_pointer *dest);
diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 6cf1a63f8c9..76b38b85e80 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -739,9 +739,9 @@ vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value 
*src,
 }
 
 struct vtn_ssa_value *
-vtn_pointer_load(struct vtn_builder *b, struct vtn_pointer *ptr)
+vtn_pointer_load(struct vtn_builder *b, struct vtn_pointer *ptr, struct 
vtn_type *res_type)
 {
-   const struct glsl_type *type = ptr->type->type;
+   const struct glsl_type *type = res_type->type;
struct vtn_ssa_value *val = vtn_create_ssa_value(b, type);
nir_intrinsic_op op = nir_intrinsic_load_global;
 
@@ -1207,7 +1207,7 @@ _vtn_variable_load_store(struct vtn_builder *b, bool load,
   * with it.  Just directly generate load/store_global intrinsics:
   */
  if (load) {
-*inout = vtn_pointer_load(b, ptr);
+*inout = vtn_pointer_load(b, ptr, ptr->type);
  } else {
 vtn_pointer_store(b, *inout, ptr);
  }
@@ -1244,12 +1244,12 @@ _vtn_variable_load_store(struct vtn_builder *b, bool 
load,
 }
 
 struct vtn_ssa_value *
-vtn_variable_load(struct vtn_builder *b, struct vtn_pointer *src)
+vtn_variable_load(struct vtn_builder *b, struct vtn_pointer *src, struct 
vtn_type *type)
 {
if (vtn_pointer_is_external_block(b, src)) {
   return vtn_block_load(b, src);
} else if (!src->var) {
-  return vtn_pointer_load(b, src);
+  return vtn_pointer_load(b, src, type);
} else {
   struct vtn_ssa_value *val = NULL;
   _vtn_variable_load_store(b, true, src, );
@@ -1298,7 +1298,7 @@ _vtn_variable_copy(struct vtn_builder *b, struct 
vtn_pointer *dest,
* ensure that matrices get loaded in the optimal way even if they
* are storred row-major in a UBO.
*/
-  vtn_variable_store(b, vtn_variable_load(b, src), dest);
+  vtn_variable_store(b, vtn_variable_load(b, src, src->type), dest);
   return;
 
case GLSL_TYPE_ARRAY:
@@ -2322,7 +2322,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
  return;
   }
 
-  vtn_push_ssa(b, w[2], res_type, vtn_variable_load(b, src));
+  vtn_push_ssa(b, w[2], res_type, vtn_variable_load(b, src, res_type));
   break;
}
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 14/19] nir/vtn/opencl: support fma

2018-03-23 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/spirv/vtn_opencl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/compiler/spirv/vtn_opencl.c b/src/compiler/spirv/vtn_opencl.c
index 3c5ecd22452..723a7edf9c2 100644
--- a/src/compiler/spirv/vtn_opencl.c
+++ b/src/compiler/spirv/vtn_opencl.c
@@ -58,6 +58,7 @@ static nir_op
 nir_alu_op_for_opencl_opcode(struct vtn_builder *b, enum OpenCLstd opcode)
 {
switch (opcode) {
+   case Fma: return nir_op_ffma;
case SHadd: return nir_op_ihadd;
case UHadd: return nir_op_uhadd;
default:
@@ -236,6 +237,7 @@ vtn_handle_opencl_instruction(struct vtn_builder *b, 
uint32_t ext_opcode,
switch (ext_opcode) {
case SHadd:
case UHadd:
+   case Fma:
   handle_instr(b, ext_opcode, w, count, handle_alu);
   return true;
case Vloadn:
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 09/19] nir/vtn: initial OpenCL.std extension

2018-03-23 Thread Karol Herbst
From: Rob Clark <robdcl...@gmail.com>

Not complete, mostly just adding things as I encounter them in CTS.  But
not getting far enough yet to hit most of the OpenCL.std instructions.

v2: update hadd definition (Karol Herbst <kher...@redhat.com>)

Signed-off-by: Rob Clark <robdcl...@gmail.com>
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/nir/meson.build  |   1 +
 src/compiler/nir/nir_opcodes.py   |   3 +-
 src/compiler/spirv/spirv_to_nir.c |   2 +
 src/compiler/spirv/vtn_opencl.c   | 266 ++
 src/compiler/spirv/vtn_private.h  |   3 +
 5 files changed, 274 insertions(+), 1 deletion(-)
 create mode 100644 src/compiler/spirv/vtn_opencl.c

diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index a70c236b958..213a139a1b8 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -192,6 +192,7 @@ files_libnir = files(
   '../spirv/vtn_amd.c',
   '../spirv/vtn_cfg.c',
   '../spirv/vtn_glsl450.c',
+  '../spirv/vtn_opencl.c',
   '../spirv/vtn_private.h',
   '../spirv/vtn_subgroup.c',
   '../spirv/vtn_variables.c',
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 65d13200624..86fd6b6d68e 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -768,4 +768,5 @@ dst.z = src2.x;
 dst.w = src3.x;
 """)
 
-
+binop("ihadd", tint, commutative, "(src0 >> 1) + (src1 >> 1) + (src0 & src1 & 
1)")
+binop("uhadd", tuint, commutative, "(src0 >> 1) + (src1 >> 1) + (src0 & src1 & 
1)")
diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 3acb3fc0b42..6a16d77a771 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -379,6 +379,8 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
   } else if ((strcmp((const char *)[2], "SPV_AMD_gcn_shader") == 0)
 && (b->options && b->options->caps.gcn_shader)) {
  val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
+  } else if (strcmp(ext, "OpenCL.std") == 0) {
+ val->ext_handler = vtn_handle_opencl_instruction;
   } else {
  vtn_fail("Unsupported extension: %s", ext);
   }
diff --git a/src/compiler/spirv/vtn_opencl.c b/src/compiler/spirv/vtn_opencl.c
new file mode 100644
index 000..3c5ecd22452
--- /dev/null
+++ b/src/compiler/spirv/vtn_opencl.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright © 2018 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Rob Clark (robdcl...@gmail.com)
+ */
+
+#include "vtn_private.h"
+#include "OpenCL.std.h"
+
+typedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b, enum OpenCLstd 
opcode,
+unsigned num_srcs, nir_ssa_def **srcs);
+
+static void
+handle_instr(struct vtn_builder *b, enum OpenCLstd opcode, const uint32_t *w,
+ unsigned count, nir_handler handler)
+{
+   const struct glsl_type *dest_type =
+  vtn_value(b, w[1], vtn_value_type_type)->type->type;
+
+   unsigned num_srcs = count - 5;
+   nir_ssa_def *srcs[3] = { NULL, };
+   vtn_assert(num_srcs <= ARRAY_SIZE(srcs));
+   for (unsigned i = 0; i < num_srcs; i++) {
+  srcs[i] = vtn_ssa_value(b, w[i + 5])->def;
+   }
+
+   nir_ssa_def *result = handler(b, opcode, num_srcs, srcs);
+   if (result) {
+  struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+  val->ssa = vtn_create_ssa_value(b, dest_type);
+  val->ssa->def = result;
+   } else {
+  vtn_assert(dest_type == glsl_void_type());
+   }
+}
+
+static nir_op
+nir_alu_op_for_opencl_opcode(struct vtn_builder *b, enum OpenCLstd opc

[Mesa-dev] [PATCH v3 18/19] nir: kernel entrypoints can have arguments

2018-03-23 Thread Karol Herbst
From: Rob Clark <robdcl...@gmail.com>

This assert is not valid for OpenCL kernels.

TODO can we somehow conditionally assert based on glsl vs cl??

Signed-off-by: Rob Clark <robdcl...@gmail.com>
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/nir/nir.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 6a51b7c4ab1..fedda73aa5e 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1974,7 +1974,6 @@ nir_shader_get_entrypoint(nir_shader *shader)
struct exec_node *func_node = exec_list_get_head(>functions);
nir_function *func = exec_node_data(nir_function, func_node, node);
assert(func->return_type == glsl_void_type());
-   assert(func->num_params == 0);
assert(func->impl);
return func->impl;
 }
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 10/19] RFC: nir/vtn: handle constant builtins from kernels

2018-03-23 Thread Karol Herbst
With SPIR-V it is perfectly fine to declare builtins as constants and have no
constant initializer on them.

This change seems to be able to break Vulkan shaders, so please check if this
is the correct thing here.

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/spirv/vtn_variables.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index af9222d6f4e..80fca6e8a32 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -1580,7 +1580,6 @@ apply_var_decoration(struct vtn_builder *b, nir_variable 
*nir_var,
   nir_var->data.invariant = true;
   break;
case SpvDecorationConstant:
-  vtn_assert(nir_var->constant_initializer != NULL);
   nir_var->data.read_only = true;
   break;
case SpvDecorationNonReadable:
@@ -2031,6 +2030,7 @@ vtn_create_variable(struct vtn_builder *b, struct 
vtn_value *val,
case vtn_variable_mode_global:
case vtn_variable_mode_image:
case vtn_variable_mode_sampler:
+   case vtn_variable_mode_const:
   /* For these, we create the variable normally */
   var->var = rzalloc(b->shader, nir_variable);
   var->var->name = ralloc_strdup(var->var, val->name);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 06/19] RFC: nir/vtn: "raw" pointer support

2018-03-23 Thread Karol Herbst
From: Rob Clark <robdcl...@gmail.com>

An attempt to add physical pointer support to vtn.  I'm not totally
happy about the handling of logical pointers vs physical pointers.
So this is really more of an RFS (request for suggestions)

v2: treat vec3 types as vec4 when dereferencing

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/spirv/spirv_to_nir.c  |  87 ---
 src/compiler/spirv/vtn_private.h   |  20 ++-
 src/compiler/spirv/vtn_variables.c | 300 -
 3 files changed, 347 insertions(+), 60 deletions(-)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 334bcab9a82..d58a68f80ef 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -572,6 +572,7 @@ vtn_types_compatible(struct vtn_builder *b,
  vtn_types_compatible(b, t1->array_element, t2->array_element);
 
case vtn_base_type_pointer:
+   case vtn_base_type_raw_pointer:
   return vtn_types_compatible(b, t1->deref, t2->deref);
 
case vtn_base_type_struct:
@@ -609,6 +610,7 @@ vtn_type_copy(struct vtn_builder *b, struct vtn_type *src)
case vtn_base_type_matrix:
case vtn_base_type_array:
case vtn_base_type_pointer:
+   case vtn_base_type_raw_pointer:
case vtn_base_type_image:
case vtn_base_type_sampler:
case vtn_base_type_sampled_image:
@@ -939,6 +941,14 @@ vtn_type_layout_std430(struct vtn_builder *b, struct 
vtn_type *type,
   return type;
}
 
+   case vtn_base_type_raw_pointer: {
+  uint32_t comp_size = b->ptr_size / 8;
+  vtn_assert(comp_size);
+  *size_out = comp_size;
+  *align_out = comp_size;
+  return type;
+   }
+
case vtn_base_type_vector: {
   uint32_t comp_size = glsl_get_bit_size(type->type) / 8;
   assert(type->length > 0 && type->length <= 4);
@@ -1003,6 +1013,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
   val->type->base_type = vtn_base_type_scalar;
   val->type->type = glsl_bool_type();
   val->type->length = 1;
+  val->type->stride = 4;
   break;
case SpvOpTypeInt: {
   int bit_size = w[2];
@@ -1025,6 +1036,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
  vtn_fail("Invalid int bit size");
   }
   val->type->length = 1;
+  val->type->stride = bit_size / 8;
   break;
}
 
@@ -1045,6 +1057,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
  vtn_fail("Invalid float bit size");
   }
   val->type->length = 1;
+  val->type->stride = bit_size / 8;
   break;
}
 
@@ -1061,6 +1074,10 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
   val->type->type = glsl_vector_type(glsl_get_base_type(base->type), 
elems);
   val->type->length = elems;
   val->type->stride = glsl_get_bit_size(base->type) / 8;
+  /* special case: vec3 is aligned to vec4 */
+  if (elems == 3)
+ elems = 4;
+  val->type->stride *= elems;
   val->type->array_element = base;
   break;
}
@@ -1138,7 +1155,11 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
 
   const char *name = val->name ? val->name : "struct";
 
-  val->type->type = glsl_struct_type(fields, num_fields, name, false);
+  val->type->type = glsl_struct_type(fields, num_fields, name,
+ val->type->packed);
+  // TODO stride for a struct only matters for kernel shaders, where
+  // cl_size is the right thing.. but still a bit ugly to hard-code.
+  val->type->stride = glsl_get_cl_size(val->type->type);
   break;
}
 
@@ -1167,25 +1188,47 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
   val->type->storage_class = storage_class;
   val->type->deref = deref_type;
 
-  if (storage_class == SpvStorageClassUniform ||
-  storage_class == SpvStorageClassStorageBuffer) {
- /* These can actually be stored to nir_variables and used as SSA
-  * values so they need a real glsl_type.
-  */
- val->type->type = glsl_vector_type(GLSL_TYPE_UINT, 2);
-  }
-
-  if (storage_class == SpvStorageClassWorkgroup &&
-  b->options->lower_workgroup_access_to_offsets) {
+  // XXX handling the "fake" glsl pointers vs "raw" pointers in kernel
+  // is a bit ugly..  need to understand how "pointers" are used in vk
+  // and figure out something better
+  if (storage_class == SpvStorageClassFunction ||
+  storage_class == SpvStorageClassUniformConstant ||
+  storage_class == SpvStorageClassWorkgroup ||
+  !b->kernel_mode) {
+ if (storage_class == SpvStorageClassUniform ||
+ storage_class == 

[Mesa-dev] [PATCH v3 08/19] nir/vtn: import OpenCL.std.h

2018-03-23 Thread Karol Herbst
From: Rob Clark <robdcl...@gmail.com>

Lightly edited to be valid 'C' code.

Is there a bug open to fix this upstream?

Signed-off-by: Rob Clark <robdcl...@gmail.com>
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/spirv/OpenCL.std.h | 211 
 1 file changed, 211 insertions(+)
 create mode 100644 src/compiler/spirv/OpenCL.std.h

diff --git a/src/compiler/spirv/OpenCL.std.h b/src/compiler/spirv/OpenCL.std.h
new file mode 100644
index 000..1e9e7fc8d8a
--- /dev/null
+++ b/src/compiler/spirv/OpenCL.std.h
@@ -0,0 +1,211 @@
+/*
+** Copyright (c) 2015-2017 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a copy
+** of this software and/or associated documentation files (the "Materials"),
+** to deal in the Materials without restriction, including without limitation
+** the rights to use, copy, modify, merge, publish, distribute, sublicense,
+** and/or sell copies of the Materials, and to permit persons to whom the
+** Materials are furnished to do so, subject to the following conditions:
+**
+** The above copyright notice and this permission notice shall be included in
+** all copies or substantial portions of the Materials.
+**
+** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
+** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
+** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ 
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
+** IN THE MATERIALS.
+*/
+
+#ifndef OpenCLstd_H
+#define OpenCLstd_H
+
+enum OpenCLstd {
+
+// Section 2.1: Math extended instructions
+Acos = 0,
+Acosh = 1,
+Acospi = 2,
+Asin = 3,
+Asinh = 4,
+Asinpi = 5,
+Atan = 6,
+Atan2 = 7,
+Atanh = 8,
+Atanpi = 9,
+Atan2pi = 10,
+Cbrt = 11,
+Ceil = 12,
+Copysign = 13,
+Cos = 14,
+Cosh = 15,
+Cospi = 16,
+Erfc = 17,
+Erf = 18,
+Exp = 19,
+Exp2 = 20,
+Exp10 = 21,
+Expm1 = 22,
+Fabs = 23,
+Fdim = 24,
+Floor = 25,
+Fma = 26,
+Fmax = 27,
+Fmin = 28,
+Fmod = 29,
+Fract = 30, 
+Frexp = 31,
+Hypot = 32,
+Ilogb = 33,
+Ldexp = 34,
+Lgamma = 35,
+Lgamma_r = 36,
+Log = 37,
+Log2 = 38,
+Log10 = 39,
+Log1p = 40,
+Logb = 41,
+Mad = 42,
+Maxmag = 43,
+Minmag = 44,
+Modf = 45,
+Nan = 46,
+Nextafter = 47,
+Pow = 48,
+Pown = 49,
+Powr = 50,
+Remainder = 51,
+Remquo = 52,
+Rint = 53,
+Rootn = 54,
+Round = 55,
+Rsqrt = 56,
+Sin = 57,
+Sincos = 58,
+Sinh = 59,
+Sinpi = 60,
+Sqrt = 61,
+Tan = 62,
+Tanh = 63,
+Tanpi = 64,
+Tgamma = 65,
+Trunc = 66,
+Half_cos = 67,
+Half_divide = 68,
+Half_exp = 69,
+Half_exp2 = 70,
+Half_exp10 = 71,
+Half_log = 72,
+Half_log2 = 73,
+Half_log10 = 74,
+Half_powr = 75,
+Half_recip = 76,
+Half_rsqrt = 77,
+Half_sin = 78,
+Half_sqrt = 79,
+Half_tan = 80,
+Native_cos = 81,
+Native_divide = 82,
+Native_exp = 83,
+Native_exp2 = 84,
+Native_exp10 = 85,
+Native_log = 86,
+Native_log2 = 87,
+Native_log10 = 88,
+Native_powr = 89,
+Native_recip = 90,
+Native_rsqrt = 91,
+Native_sin = 92,
+Native_sqrt = 93,
+Native_tan = 94,
+
+// Section 2.2: Integer instructions
+SAbs = 141,
+SAbs_diff = 142,
+SAdd_sat = 143,
+UAdd_sat = 144,
+SHadd = 145,
+UHadd = 146,
+SRhadd = 147,
+URhadd = 148,
+SClamp = 149,
+UClamp = 150, 
+Clz = 151,
+Ctz = 152,
+SMad_hi = 153,
+UMad_sat = 154,
+SMad_sat = 155,
+SMax = 156,
+UMax = 157,
+SMin = 158,
+UMin = 159,
+SMul_hi = 160,
+Rotate = 161,
+SSub_sat = 162,
+USub_sat = 163,
+U_Upsample = 164,
+S_Upsample = 165,
+Popcount = 166,
+SMad24 = 167,
+UMad24 = 168,
+SMul24 = 169,
+UMul24 = 170,
+UAbs = 201,
+UAbs_diff = 202,
+UMul_hi = 203,
+UMad_hi = 204,
+
+// Section 2.3: Common instructions
+FClamp = 95,
+Degrees = 96,
+FMax_common = 97,
+FMin_common = 98, 
+Mix = 99,
+Radians = 100,
+Step = 101,
+Smoothstep = 102,
+Sign = 103,
+
+// Section 2.4: Geometric instructions
+Cross = 104,
+Distance = 105, 
+Length = 106,
+Normalize = 107,
+Fast_distance = 108,
+Fast_length = 109,
+Fast_

[Mesa-dev] [PATCH v3 12/19] nir: specify bit_size when loading system values

2018-03-23 Thread Karol Herbst
With OpenCL the size of some system value depends on the Physical model
choosen, so we need a way to load any system value as 32 or 64 bit.

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/nir/nir_builder.h   | 10 +---
 src/compiler/nir/nir_lower_alpha_test.c  |  2 +-
 src/compiler/nir/nir_lower_clip.c|  3 ++-
 src/compiler/nir/nir_lower_subgroups.c   |  8 +++---
 src/compiler/nir/nir_lower_system_values.c   | 31 
 src/compiler/nir/nir_lower_two_sided_color.c |  2 +-
 src/compiler/nir/nir_lower_wpos_center.c |  2 +-
 src/compiler/spirv/vtn_subgroup.c|  2 +-
 src/gallium/auxiliary/nir/tgsi_to_nir.c  |  3 ++-
 src/intel/blorp/blorp_blit.c |  2 +-
 src/intel/blorp/blorp_clear.c|  2 +-
 src/intel/compiler/brw_nir_lower_cs_intrinsics.c |  6 ++---
 src/mesa/drivers/dri/i965/brw_tcs.c  |  2 +-
 13 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 36e0ae3ac63..4e93cd08169 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -612,13 +612,14 @@ nir_copy_var(nir_builder *build, nir_variable *dest, 
nir_variable *src)
 
 /* Generic builder for system values. */
 static inline nir_ssa_def *
-nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
+nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index,
+  unsigned bit_size)
 {
nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op);
load->num_components = nir_intrinsic_infos[op].dest_components;
load->const_index[0] = index;
nir_ssa_dest_init(>instr, >dest,
- nir_intrinsic_infos[op].dest_components, 32, NULL);
+ nir_intrinsic_infos[op].dest_components, bit_size, NULL);
nir_builder_instr_insert(build, >instr);
return >dest.ssa;
 }
@@ -630,9 +631,10 @@ nir_load_system_value(nir_builder *build, nir_intrinsic_op 
op, int index)
 
 #define DEFINE_SYSTEM_VALUE(name)\
static inline nir_ssa_def *   \
-   nir_load_##name(nir_builder *build)   \
+   nir_load_##name(nir_builder *build, unsigned bit_size)\
{ \
-  return nir_load_system_value(build, nir_intrinsic_load_##name, 0); \
+  return nir_load_system_value(build, nir_intrinsic_load_##name, 0,  \
+   bit_size);\
}
 
 #include "nir_intrinsics.h"
diff --git a/src/compiler/nir/nir_lower_alpha_test.c 
b/src/compiler/nir/nir_lower_alpha_test.c
index 6bf9ff142df..29f91ab9428 100644
--- a/src/compiler/nir/nir_lower_alpha_test.c
+++ b/src/compiler/nir/nir_lower_alpha_test.c
@@ -92,7 +92,7 @@ nir_lower_alpha_test(nir_shader *shader, enum compare_func 
func,
 
nir_ssa_def *condition =
   nir_compare_func(, func,
-   alpha, nir_load_alpha_ref_float());
+   alpha, nir_load_alpha_ref_float(, 32));
 
nir_intrinsic_instr *discard =
   nir_intrinsic_instr_create(b.shader,
diff --git a/src/compiler/nir/nir_lower_clip.c 
b/src/compiler/nir/nir_lower_clip.c
index ea12f51a7bb..b9a91f7d40b 100644
--- a/src/compiler/nir/nir_lower_clip.c
+++ b/src/compiler/nir/nir_lower_clip.c
@@ -174,7 +174,8 @@ lower_clip_vs(nir_function_impl *impl, unsigned ucp_enables,
for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
   if (ucp_enables & (1 << plane)) {
  nir_ssa_def *ucp =
-nir_load_system_value(, nir_intrinsic_load_user_clip_plane, 
plane);
+nir_load_system_value(, nir_intrinsic_load_user_clip_plane,
+  plane, 32);
 
  /* calculate clipdist[plane] - dot(ucp, cv): */
  clipdist[plane] = nir_fdot4(, ucp, cv);
diff --git a/src/compiler/nir/nir_lower_subgroups.c 
b/src/compiler/nir/nir_lower_subgroups.c
index 0d3c83b7951..7e910c013a9 100644
--- a/src/compiler/nir/nir_lower_subgroups.c
+++ b/src/compiler/nir/nir_lower_subgroups.c
@@ -190,7 +190,7 @@ static nir_ssa_def *
 lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
   bool lower_to_scalar)
 {
-   nir_ssa_def *index = nir_load_subgroup_invocation(b);
+   nir_ssa_def *index = nir_load_subgroup_invocation(b, 32);
switch (intrin->intrinsic) {
case nir_intrinsic_shuffle_xor:
   assert(intrin->src[1].is_ssa);
@@ -300,7 +300,7 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr 
*intrin,
   assert(options->subgroup_size <= 64);
   uint64_t group_mask = ~0ull

[Mesa-dev] [PATCH v3 03/19] glsl: add packed for struct types

2018-03-23 Thread Karol Herbst
We need this for OpenCL kernels because we have to apply C rules for alignment
and padding inside structs and for this we also have to know if a struct is
packed or not.

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/glsl_types.cpp   | 17 +++--
 src/compiler/glsl_types.h | 12 ++--
 src/compiler/nir_types.cpp|  5 +++--
 src/compiler/nir_types.h  |  3 ++-
 src/compiler/spirv/spirv_to_nir.c | 10 +-
 src/compiler/spirv/vtn_private.h  |  7 +++
 6 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index a73caa908bf..12a8a546938 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -90,11 +90,11 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type 
base_type,
 }
 
 glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
- const char *name) :
+ const char *name, bool packed) :
gl_type(0),
base_type(GLSL_TYPE_STRUCT), sampled_type(GLSL_TYPE_VOID),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0), packed(packed),
vector_elements(0), matrix_columns(0),
length(num_fields)
 {
@@ -1131,9 +1131,10 @@ glsl_type::record_key_hash(const void *a)
 const glsl_type *
 glsl_type::get_record_instance(const glsl_struct_field *fields,
unsigned num_fields,
-   const char *name)
+   const char *name,
+   bool packed)
 {
-   const glsl_type key(fields, num_fields, name);
+   const glsl_type key(fields, num_fields, name, packed);
 
mtx_lock(_type::hash_mutex);
 
@@ -1145,7 +1146,7 @@ glsl_type::get_record_instance(const glsl_struct_field 
*fields,
const struct hash_entry *entry = _mesa_hash_table_search(record_types,
 );
if (entry == NULL) {
-  const glsl_type *t = new glsl_type(fields, num_fields, name);
+  const glsl_type *t = new glsl_type(fields, num_fields, name, packed);
 
   entry = _mesa_hash_table_insert(record_types, t, (void *) t);
}
@@ -1153,6 +1154,7 @@ glsl_type::get_record_instance(const glsl_struct_field 
*fields,
assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_STRUCT);
assert(((glsl_type *) entry->data)->length == num_fields);
assert(strcmp(((glsl_type *) entry->data)->name, name) == 0);
+   assert(((glsl_type *) entry->data)->packed == packed);
 
mtx_unlock(_type::hash_mutex);
 
@@ -2262,6 +2264,8 @@ encode_type_to_blob(struct blob *blob, const glsl_type 
*type)
   if (type->is_interface()) {
  blob_write_uint32(blob, type->interface_packing);
  blob_write_uint32(blob, type->interface_row_major);
+  } else {
+ blob_write_uint32(blob, type->packed);
   }
   return;
case GLSL_TYPE_VOID:
@@ -2341,7 +2345,8 @@ decode_type_from_blob(struct blob_reader *blob)
  t = glsl_type::get_interface_instance(fields, num_fields, packing,
row_major, name);
   } else {
- t = glsl_type::get_record_instance(fields, num_fields, name);
+ unsigned packed = blob_read_uint32(blob);
+ t = glsl_type::get_record_instance(fields, num_fields, name, packed);
   }
 
   free(fields);
diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h
index 6982d52e392..34d03505ae8 100644
--- a/src/compiler/glsl_types.h
+++ b/src/compiler/glsl_types.h
@@ -164,6 +164,13 @@ struct glsl_type {
unsigned interface_packing:2;
unsigned interface_row_major:1;
 
+   /**
+* For \c GLSL_TYPE_STRUCT this specifies if the struct is packed or not.
+*
+* Only used for Compute kernels
+*/
+   unsigned packed:1;
+
 private:
glsl_type() : mem_ctx(NULL)
{
@@ -286,7 +293,8 @@ public:
 */
static const glsl_type *get_record_instance(const glsl_struct_field *fields,
   unsigned num_fields,
-  const char *name);
+  const char *name,
+  bool packed = false);
 
/**
 * Get the instance of an interface block type
@@ -867,7 +875,7 @@ private:
 
/** Constructor for record types */
glsl_type(const glsl_struct_field *fields, unsigned num_fields,
-const char *name);
+const char *name, bool packed = false);
 
/** Constructor for interface types */
glsl_type(const glsl_struct_field *fields, unsigned num_fields,
diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp
index ee6b06aea63..76a9cf2fde7 100644
--- a/src/compiler/nir_types.cpp
+++ b/sr

[Mesa-dev] [PATCH v3 07/19] nir/vtn: print extension name in fail msg

2018-03-23 Thread Karol Herbst
From: Rob Clark <robdcl...@gmail.com>

Signed-off-by: Rob Clark <robdcl...@gmail.com>
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/spirv/spirv_to_nir.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index d58a68f80ef..3acb3fc0b42 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -370,16 +370,17 @@ static void
 vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
  const uint32_t *w, unsigned count)
 {
+   const char *ext = (const char *)[2];
switch (opcode) {
case SpvOpExtInstImport: {
   struct vtn_value *val = vtn_push_value(b, w[1], 
vtn_value_type_extension);
-  if (strcmp((const char *)[2], "GLSL.std.450") == 0) {
+  if (strcmp(ext, "GLSL.std.450") == 0) {
  val->ext_handler = vtn_handle_glsl450_instruction;
   } else if ((strcmp((const char *)[2], "SPV_AMD_gcn_shader") == 0)
 && (b->options && b->options->caps.gcn_shader)) {
  val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
   } else {
- vtn_fail("Unsupported extension");
+ vtn_fail("Unsupported extension: %s", ext);
   }
   break;
}
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 13/19] nir/vtn: Handle OpInBoundsPtrAccessChain

2018-03-23 Thread Karol Herbst
From: Rob Clark <robdcl...@gmail.com>

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/spirv/spirv_to_nir.c  | 1 +
 src/compiler/spirv/vtn_variables.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 6a16d77a771..3b86aef2978 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -3732,6 +3732,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp 
opcode,
case SpvOpCopyMemorySized:
case SpvOpAccessChain:
case SpvOpPtrAccessChain:
+   case SpvOpInBoundsPtrAccessChain:
case SpvOpInBoundsAccessChain:
case SpvOpArrayLength:
   vtn_handle_variables(b, opcode, w, count);
diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 51f73b3cf8c..6cf1a63f8c9 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -2259,6 +2259,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
 
case SpvOpAccessChain:
case SpvOpPtrAccessChain:
+   case SpvOpInBoundsPtrAccessChain:
case SpvOpInBoundsAccessChain: {
   struct vtn_type *ptr_type = vtn_value(b, w[1], 
vtn_value_type_type)->type;
   struct vtn_value *base_val = vtn_untyped_value(b, w[3]);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 11/19] nir/vtn: pointers can point to cross_workgroup or local memory as well

2018-03-23 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/spirv/vtn_variables.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 80fca6e8a32..51f73b3cf8c 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -1917,7 +1917,9 @@ vtn_pointer_to_ssa(struct vtn_builder *b, struct 
vtn_pointer *ptr)
  ptr->mode == vtn_variable_mode_ssbo);
   return nir_vec2(>nb, ptr->block_index, ptr->offset);
} else {
-  vtn_assert(ptr->mode == vtn_variable_mode_workgroup);
+  vtn_assert(ptr->mode == vtn_variable_mode_workgroup ||
+ ptr->mode == vtn_variable_mode_cross_workgroup ||
+ ptr->mode == vtn_variable_mode_local);
   return ptr->offset;
}
 }
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 02/19] vtn: handle SpvExecutionModelKernel

2018-03-23 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/spirv/spirv_to_nir.c | 3 +++
 src/compiler/spirv/vtn_private.h  | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 7ce7e9ba62e..edf02db584b 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -3178,6 +3178,9 @@ stage_for_execution_model(struct vtn_builder *b, 
SpvExecutionModel model)
   return MESA_SHADER_FRAGMENT;
case SpvExecutionModelGLCompute:
   return MESA_SHADER_COMPUTE;
+   case SpvExecutionModelKernel:
+  b->kernel_mode = true;
+  return MESA_SHADER_COMPUTE;
default:
   vtn_fail("Unsupported execution model");
}
diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h
index 70f660fbd48..9f5a22905f1 100644
--- a/src/compiler/spirv/vtn_private.h
+++ b/src/compiler/spirv/vtn_private.h
@@ -580,6 +580,8 @@ struct vtn_builder {
unsigned func_param_idx;
 
bool has_loop_continue;
+
+   bool kernel_mode;
 };
 
 nir_ssa_def *
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 04/19] glsl: add glsl_base_get_byte_size

2018-03-23 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/glsl_types.h | 34 ++
 src/compiler/nir_types.h  | 30 +-
 2 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h
index 34d03505ae8..2e63261090e 100644
--- a/src/compiler/glsl_types.h
+++ b/src/compiler/glsl_types.h
@@ -1068,4 +1068,38 @@ glsl_align(unsigned int a, unsigned int align)
return (a + align - 1) / align * align;
 }
 
+static inline unsigned
+glsl_base_get_byte_size(const enum glsl_base_type base_type)
+{
+   switch (base_type) {
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_FLOAT: /* TODO handle mediump */
+   case GLSL_TYPE_SUBROUTINE:
+  return 4;
+
+   case GLSL_TYPE_FLOAT16:
+   case GLSL_TYPE_UINT16:
+   case GLSL_TYPE_INT16:
+  return 2;
+
+   case GLSL_TYPE_UINT8:
+   case GLSL_TYPE_INT8:
+  return 1;
+
+   case GLSL_TYPE_DOUBLE:
+   case GLSL_TYPE_INT64:
+   case GLSL_TYPE_UINT64:
+   case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_SAMPLER:
+  return 8;
+
+   default:
+  unreachable("unknown base type");
+   }
+
+   return 0;
+}
+
 #endif /* GLSL_TYPES_H */
diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h
index 8687d4f1336..033b3ae739b 100644
--- a/src/compiler/nir_types.h
+++ b/src/compiler/nir_types.h
@@ -86,35 +86,7 @@ unsigned glsl_get_record_location_offset(const struct 
glsl_type *type,
 static inline unsigned
 glsl_get_bit_size(const struct glsl_type *type)
 {
-   switch (glsl_get_base_type(type)) {
-   case GLSL_TYPE_INT:
-   case GLSL_TYPE_UINT:
-   case GLSL_TYPE_BOOL:
-   case GLSL_TYPE_FLOAT: /* TODO handle mediump */
-   case GLSL_TYPE_SUBROUTINE:
-  return 32;
-
-   case GLSL_TYPE_FLOAT16:
-   case GLSL_TYPE_UINT16:
-   case GLSL_TYPE_INT16:
-  return 16;
-
-   case GLSL_TYPE_UINT8:
-   case GLSL_TYPE_INT8:
-  return 8;
-
-   case GLSL_TYPE_DOUBLE:
-   case GLSL_TYPE_INT64:
-   case GLSL_TYPE_UINT64:
-   case GLSL_TYPE_IMAGE:
-   case GLSL_TYPE_SAMPLER:
-  return 64;
-
-   default:
-  unreachable("unknown base type");
-   }
-
-   return 0;
+   return glsl_base_get_byte_size(glsl_get_base_type(type)) * 8;
 }
 
 bool glsl_type_is_64bit(const struct glsl_type *type);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 01/19] nir: add load/store_global intrinsics

2018-03-23 Thread Karol Herbst
OpenCL kernels have raw pointers to global memory, so we need
instructions to load/store in order to dereference these pointers.
In some ways similar to other load/store intrinsics, but rather
than taking an offset as a src argument, they take a raw pointer
value (which can be 32b or 64b depending on the memory model).

Signed-off-by: Rob Clark <robdcl...@gmail.com>
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/nir/nir_intrinsics.h | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index 7b737559d5a..6597eaea87b 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -511,6 +511,8 @@ LOAD(shared, 1, 1, BASE, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { offset }. const_index[] = { base, range } */
 LOAD(push_constant, 1, 2, BASE, RANGE, xx,
  NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { address }. No const_index */
+LOAD(global, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 
 /*
  * Stores work the same way as loads, except now the first source is the value
@@ -532,8 +534,10 @@ STORE(per_vertex_output, 3, 3, BASE, WRMASK, COMPONENT, 0)
 STORE(ssbo, 3, 1, WRMASK, xx, xx, 0)
 /* src[] = { value, offset }. const_index[] = { base, write_mask } */
 STORE(shared, 2, 2, BASE, WRMASK, xx, 0)
+/* src[] = { value, address }. const_index[] = { write_mask } */
+STORE(global, 2, 1, WRMASK, xx, xx, 0)
 
-LAST_INTRINSIC(store_shared)
+LAST_INTRINSIC(store_global)
 
 #undef DEFINE_SYSTEM_VALUE
 #undef INTRINSIC
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 05/19] RFC glsl: add cl_size and cl_alignment

2018-03-23 Thread Karol Herbst
v2: fix cl_size for arrays_of_arrays

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/compiler/glsl_types.cpp | 48 +
 src/compiler/glsl_types.h   | 10 ++
 src/compiler/nir_types.cpp  | 12 
 src/compiler/nir_types.h|  4 
 4 files changed, 74 insertions(+)

diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index 12a8a546938..2bf44c6fc30 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -2360,3 +2360,51 @@ decode_type_from_blob(struct blob_reader *blob)
   return NULL;
}
 }
+
+unsigned
+glsl_type::cl_alignment() const
+{
+   /* vectors unlike arrays are aligned to their size */
+   if (this->is_scalar() || this->is_vector())
+  return this->cl_size();
+   else if (this->is_array())
+  return this->without_array()->cl_alignment();
+   else if (this->is_record()) {
+  /* Packed Structs are 0x1 aligned despite their size. */
+  if (this->packed)
+ return 1;
+
+  unsigned res = 1;
+  for (unsigned i = 0; i < this->length; ++i) {
+ struct glsl_struct_field  = this->fields.structure[i];
+ res = MAX2(res, field.type->cl_alignment());
+  }
+  return res;
+   }
+   return 1;
+}
+
+unsigned
+glsl_type::cl_size() const
+{
+   if (this->is_scalar()) {
+  return glsl_base_get_byte_size(this->base_type);
+   } else if (this->is_vector()) {
+  unsigned vec_elemns = this->vector_elements == 3 ? 4 : 
this->vector_elements;
+  return vec_elemns * glsl_base_get_byte_size(this->base_type);
+   } else if (this->is_array()) {
+  unsigned size = this->without_array()->cl_size();
+  return size * this->arrays_of_arrays_size();
+   } else if (this->is_record()) {
+  unsigned size = 0;
+  for (unsigned i = 0; i < this->length; ++i) {
+ struct glsl_struct_field  = this->fields.structure[i];
+ /* if a struct is packed, members don't get aligned */
+ if (!this->packed)
+size = align(size, field.type->cl_alignment());
+ size += field.type->cl_size();
+  }
+  return size;
+   }
+   return 1;
+}
diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h
index 2e63261090e..6fb5d3c7881 100644
--- a/src/compiler/glsl_types.h
+++ b/src/compiler/glsl_types.h
@@ -408,6 +408,16 @@ public:
 */
unsigned std430_size(bool row_major) const;
 
+   /**
+* Alignment in bytes of the start of this type in OpenCL memory.
+*/
+   unsigned cl_alignment() const;
+
+   /**
+* Size in bytes of this type in OpenCL memory
+*/
+   unsigned cl_size() const;
+
/**
 * \brief Can this type be implicitly converted to another?
 *
diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp
index 76a9cf2fde7..ce4ace82c1c 100644
--- a/src/compiler/nir_types.cpp
+++ b/src/compiler/nir_types.cpp
@@ -463,3 +463,15 @@ glsl_channel_type(const glsl_type *t)
   unreachable("Unhandled base type glsl_channel_type()");
}
 }
+
+unsigned
+glsl_get_cl_size(const struct glsl_type *type)
+{
+   return type->cl_size();
+}
+
+unsigned
+glsl_get_cl_alignment(const struct glsl_type *type)
+{
+   return type->cl_alignment();
+}
diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h
index 033b3ae739b..47239a6b7ce 100644
--- a/src/compiler/nir_types.h
+++ b/src/compiler/nir_types.h
@@ -83,6 +83,10 @@ enum glsl_base_type glsl_get_sampler_result_type(const 
struct glsl_type *type);
 unsigned glsl_get_record_location_offset(const struct glsl_type *type,
  unsigned length);
 
+unsigned glsl_get_cl_size(const struct glsl_type *type);
+
+unsigned glsl_get_cl_alignment(const struct glsl_type *type);
+
 static inline unsigned
 glsl_get_bit_size(const struct glsl_type *type)
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 00/19] nir/vtn/compiler: first batch of compute support

2018-03-23 Thread Karol Herbst
second series here:
https://lists.freedesktop.org/archives/mesa-dev/2018-March/188218.html

Main difference to the last series is, that I tried to focus on the real core
pars we need to get basic OpenCL support in spirv_to_nir, so that we can run
more or less complex examples.

There are some important core NIR changes and somebody should take a closer
look at those.

Karol Herbst (12):
  nir: add load/store_global intrinsics
  vtn: handle SpvExecutionModelKernel
  glsl: add packed for struct types
  glsl: add glsl_base_get_byte_size
  RFC glsl: add cl_size and cl_alignment
  RFC: nir/vtn: handle constant builtins from kernels
  nir/vtn: pointers can point to cross_workgroup or local memory as well
  nir: specify bit_size when loading system values
  nir/vtn/opencl: support fma
  nir: add load_kernel_param
  RFC nir/lower_io: lower kernel entry param load_vars to
load_kernel_param
  RFC: nir/vtn: member in struct deref

Rob Clark (7):
  RFC: nir/vtn: "raw" pointer support
  nir/vtn: print extension name in fail msg
  nir/vtn: import OpenCL.std.h
  nir/vtn: initial OpenCL.std extension
  nir/vtn: Handle OpInBoundsPtrAccessChain
  nir: use load_local_group_size
  nir: kernel entrypoints can have arguments

 src/compiler/glsl_types.cpp  |  65 -
 src/compiler/glsl_types.h|  56 +++-
 src/compiler/nir/meson.build |   1 +
 src/compiler/nir/nir.h   |   1 -
 src/compiler/nir/nir_builder.h   |  10 +-
 src/compiler/nir/nir_intrinsics.h|   8 +-
 src/compiler/nir/nir_lower_alpha_test.c  |   2 +-
 src/compiler/nir/nir_lower_clip.c|   3 +-
 src/compiler/nir/nir_lower_io.c  |  39 ++-
 src/compiler/nir/nir_lower_subgroups.c   |   8 +-
 src/compiler/nir/nir_lower_system_values.c   |  48 ++--
 src/compiler/nir/nir_lower_two_sided_color.c |   2 +-
 src/compiler/nir/nir_lower_wpos_center.c |   2 +-
 src/compiler/nir/nir_opcodes.py  |   3 +-
 src/compiler/nir_types.cpp   |  17 +-
 src/compiler/nir_types.h |  37 +--
 src/compiler/spirv/OpenCL.std.h  | 211 +++
 src/compiler/spirv/spirv_to_nir.c| 106 ++--
 src/compiler/spirv/vtn_opencl.c  | 268 +++
 src/compiler/spirv/vtn_private.h |  35 ++-
 src/compiler/spirv/vtn_subgroup.c|   2 +-
 src/compiler/spirv/vtn_variables.c   | 313 +++
 src/gallium/auxiliary/nir/tgsi_to_nir.c  |   3 +-
 src/intel/blorp/blorp_blit.c |   2 +-
 src/intel/blorp/blorp_clear.c|   2 +-
 src/intel/compiler/brw_nir_lower_cs_intrinsics.c |   6 +-
 src/mesa/drivers/dri/i965/brw_tcs.c  |   2 +-
 27 files changed, 1099 insertions(+), 153 deletions(-)
 create mode 100644 src/compiler/spirv/OpenCL.std.h
 create mode 100644 src/compiler/spirv/vtn_opencl.c

-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 28/34] nvir/nir: implement geometry shader nir_intrinsics

2018-03-19 Thread Karol Herbst
v4: use smarter getIndirect helper
use new getSlotAddress helper
use loadFrom helper

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 27 ++
 1 file changed, 27 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index e73f4ae1e36..46b2f3e5770 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -412,6 +412,10 @@ operation
 Converter::getOperation(nir_intrinsic_op op)
 {
switch (op) {
+   case nir_intrinsic_emit_vertex:
+  return OP_EMIT;
+   case nir_intrinsic_end_primitive:
+  return OP_RESTART;
default:
   ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
   assert(false);
@@ -1864,6 +1868,29 @@ Converter::visit(nir_intrinsic_instr *insn)
  ->subOp = NV50_IR_SUBOP_SHFL_IDX;
   break;
}
+   case nir_intrinsic_load_per_vertex_input: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectVertex;
+  Value *indirectOffset;
+  auto baseVertex = getIndirect(>src[0], 0, );
+  auto idx = getIndirect(insn, 1, 0, );
+
+  Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
+  mkImm(baseVertex), indirectVertex);
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
+  indirectOffset, vtxBase, info->in[idx].patch);
+  }
+  break;
+   }
+   case nir_intrinsic_emit_vertex:
+   case nir_intrinsic_end_primitive: {
+  auto idx = nir_intrinsic_stream_id(insn);
+  mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 29/34] nvir/nir: implement nir_intrinsic_load_ubo

2018-03-19 Thread Karol Herbst
v4: use loadFrom helper

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 46b2f3e5770..c415fa71738 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1891,6 +1891,20 @@ Converter::visit(nir_intrinsic_instr *insn)
   mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
   break;
}
+   case nir_intrinsic_load_ubo: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectIndex;
+  Value *indirectOffset;
+  uint32_t index = getIndirect(>src[0], 0, ) + 1;
+  uint32_t offset = getIndirect(>src[1], 0, );
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
+  indirectOffset, indirectIndex);
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 31/34] nvir/nir: implement images

2018-03-19 Thread Karol Herbst
v3: fix compiler warnings
v4: use loadFrom helper
v5: fix signed min/max
v6: set tex mask
add support for indirect image access
set cache mode

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 395 +++--
 1 file changed, 375 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index e15f1734cc3..5c3fde32601 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -91,6 +91,8 @@ private:
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
 
+   ImgFormat convertGLImgFormat(GLuint);
+
Value* getSrc(nir_alu_src *, uint8_t component = 0);
Value* getSrc(nir_register *, uint8_t);
Value* getSrc(nir_src *, uint8_t, bool indirect = false);
@@ -120,6 +122,7 @@ private:
 
DataType getDType(nir_alu_instr*);
DataType getDType(nir_intrinsic_instr*);
+   DataType getDType(nir_intrinsic_instr*, bool isSigned);
DataType getDType(nir_op, NirSSADefBitSize);
 
std::vector getSTypes(nir_alu_instr*);
@@ -153,6 +156,11 @@ private:
 
/* tex stuff */
Value* applyProjection(Value *src, Value *proj);
+   unsigned int getNIRArgCount(TexInstruction::Target&);
+
+   /* image stuff */
+   uint16_t derefImageVar(nir_deref_var *, Value **indirect);
+   CacheMode getCacheModeFromVar(nir_variable *);
 
nir_shader *nir;
 
@@ -244,11 +252,30 @@ Converter::getDType(nir_alu_instr *insn)
 
 DataType
 Converter::getDType(nir_intrinsic_instr *insn)
+{
+   bool isSigned;
+   switch (insn->intrinsic) {
+   case nir_intrinsic_shared_atomic_imax:
+   case nir_intrinsic_shared_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_imin:
+  isSigned = true;
+  break;
+   default:
+  isSigned = false;
+  break;
+   }
+
+   return getDType(insn, isSigned);
+}
+
+DataType
+Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
 {
if (insn->dest.is_ssa)
-  return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
+  return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
else
-  return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
+  return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
 }
 
 DataType
@@ -445,28 +472,31 @@ Converter::getSubOp(nir_op op)
}
 }
 
+#define CASE_OP_INTR_ATOM(nir, nvir) \
+   case nir_intrinsic_image_atomic_ ## nir : \
+   case nir_intrinsic_shared_atomic_ ## nir : \
+   case nir_intrinsic_ssbo_atomic_ ## nir : \
+  return NV50_IR_SUBOP_ATOM_ ## nvir
+#define CASE_OP_INTR_ATOM_S(nir, nvir) \
+   case nir_intrinsic_shared_atomic_ ## nir : \
+   case nir_intrinsic_ssbo_atomic_ ## nir : \
+  return NV50_IR_SUBOP_ATOM_ ## nvir
 int
 Converter::getSubOp(nir_intrinsic_op op)
 {
switch (op) {
-   case nir_intrinsic_ssbo_atomic_add:
-  return NV50_IR_SUBOP_ATOM_ADD;
-   case nir_intrinsic_ssbo_atomic_and:
-  return NV50_IR_SUBOP_ATOM_AND;
-   case nir_intrinsic_ssbo_atomic_comp_swap:
-  return NV50_IR_SUBOP_ATOM_CAS;
-   case nir_intrinsic_ssbo_atomic_exchange:
-  return NV50_IR_SUBOP_ATOM_EXCH;
-   case nir_intrinsic_ssbo_atomic_or:
-  return NV50_IR_SUBOP_ATOM_OR;
-   case nir_intrinsic_ssbo_atomic_imax:
-   case nir_intrinsic_ssbo_atomic_umax:
-  return NV50_IR_SUBOP_ATOM_MAX;
-   case nir_intrinsic_ssbo_atomic_imin:
-   case nir_intrinsic_ssbo_atomic_umin:
-  return NV50_IR_SUBOP_ATOM_MIN;
-   case nir_intrinsic_ssbo_atomic_xor:
-  return NV50_IR_SUBOP_ATOM_XOR;
+   CASE_OP_INTR_ATOM(add, ADD);
+   CASE_OP_INTR_ATOM(and, AND);
+   CASE_OP_INTR_ATOM(comp_swap, CAS);
+   CASE_OP_INTR_ATOM(exchange, EXCH);
+   CASE_OP_INTR_ATOM(or, OR);
+   case nir_intrinsic_image_atomic_max:
+   CASE_OP_INTR_ATOM_S(imax, MAX);
+   CASE_OP_INTR_ATOM_S(umax, MAX);
+   case nir_intrinsic_image_atomic_min:
+   CASE_OP_INTR_ATOM_S(imin, MIN);
+   CASE_OP_INTR_ATOM_S(umin, MIN);
+   CASE_OP_INTR_ATOM(xor, XOR);
case nir_intrinsic_vote_all:
   return NV50_IR_SUBOP_VOTE_ALL;
case nir_intrinsic_vote_any:
@@ -479,6 +509,8 @@ Converter::getSubOp(nir_intrinsic_op op)
   return 0;
}
 }
+#undef CASE_OP_INTR_ATOM
+#undef CASE_OP_INTR_ATOM_S
 
 CondCode
 Converter::getCondCode(nir_op op)
@@ -1628,6 +1660,68 @@ Converter::convert(nir_intrinsic_op intr)
}
 }
 
+ImgFormat
+Converter::convertGLImgFormat(GLuint format)
+{
+#define FMT_CASE(a, b) \
+  case GL_ ## a: return nv50_ir::FMT_ ## b
+
+   switch (format) {
+   FMT_CASE(NONE, NONE);
+
+   FMT_CASE(RGBA32F, RGBA32F);
+   FMT_CASE(RGBA16F, RGBA16F);
+   FMT_CASE(RG32F, RG32F);
+   FMT_CASE(RG16F, RG16F);
+   FMT_CASE(R11F_G11F_B10F, R11G11B10F);
+   FMT_CASE(R32F, R32F);
+   FMT_CASE(R16F, R16F);
+
+   FMT_CASE(RGBA32UI, RGBA32UI);
+   FMT_CASE(RG

[Mesa-dev] [PATCH v6 26/34] nvir/nir: implement vote and ballot

2018-03-19 Thread Karol Herbst
v2: add vote_eq support
use the new subop intrinsic helper
add ballot
v3: add read_(first_)invocation

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 42 ++
 1 file changed, 42 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index ebf6a5ceb5c..f594e299645 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -443,6 +443,12 @@ int
 Converter::getSubOp(nir_intrinsic_op op)
 {
switch (op) {
+   case nir_intrinsic_vote_all:
+  return NV50_IR_SUBOP_VOTE_ALL;
+   case nir_intrinsic_vote_any:
+  return NV50_IR_SUBOP_VOTE_ANY;
+   case nir_intrinsic_vote_ieq:
+  return NV50_IR_SUBOP_VOTE_UNI;
default:
   ERROR("couldn't get subop for nir_intrinsic_op %u\n", op);
   assert(false);
@@ -1809,6 +1815,42 @@ Converter::visit(nir_intrinsic_instr *insn)
   loadImm(newDefs[0], 32u);
   break;
}
+   case nir_intrinsic_vote_all:
+   case nir_intrinsic_vote_any:
+   case nir_intrinsic_vote_ieq: {
+  LValues  = convert(>dest);
+  Value *pred = new_LValue(func, FILE_PREDICATE);
+  mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(>src[0], 0), 
zero);
+  mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
+  mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
+  break;
+   }
+   case nir_intrinsic_ballot: {
+  LValues  = convert(>dest);
+  Value *pred = new_LValue(func, FILE_PREDICATE);
+  mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(>src[0], 0), 
zero);
+  Instruction *ballot = mkOp1(OP_VOTE, TYPE_U32, getSSA(), pred);
+  ballot->subOp = NV50_IR_SUBOP_VOTE_ANY;
+  mkOp2(OP_MERGE, TYPE_U64, newDefs[0], ballot->getDef(0), 
loadImm(getSSA(), 0));
+  break;
+   }
+   case nir_intrinsic_read_first_invocation:
+   case nir_intrinsic_read_invocation: {
+  LValues  = convert(>dest);
+  const DataType dType = getDType(insn);
+  Value *tmp = getScratch();
+
+  if (op == nir_intrinsic_read_first_invocation) {
+ mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = 
NV50_IR_SUBOP_VOTE_ANY;
+ mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = 
NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
+  } else
+ tmp = getSrc(>src[1], 0);
+
+  mkOp3(OP_SHFL, dType, newDefs[0], getSrc(>src[0], 0), tmp, 
mkImm(0x1f))
+ ->subOp = NV50_IR_SUBOP_SHFL_IDX;
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 34/34] nvir/nir: implement intrinsic shader_clock

2018-03-19 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 19086157baa..2f831bfe487 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -2329,6 +2329,14 @@ Converter::visit(nir_intrinsic_instr *insn)
   bar->subOp = getSubOp(op);
   break;
}
+   case nir_intrinsic_shader_clock: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+
+  loadImm(newDefs[0], 0u);
+  mkOp1v(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0));
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 23/34] nvir/nir: implement nir_ssa_undef_instr

2018-03-19 Thread Karol Herbst
v2: use mkOp

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 43a15fe0a76..6a43b764601 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -143,6 +143,7 @@ private:
bool visit(nir_jump_instr *);
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
+   bool visit(nir_ssa_undef_instr *);
 
nir_shader *nir;
 
@@ -1451,6 +1452,8 @@ Converter::visit(nir_instr *insn)
   return visit(nir_instr_as_jump(insn));
case nir_instr_type_load_const:
   return visit(nir_instr_as_load_const(insn));
+   case nir_instr_type_ssa_undef:
+  return visit(nir_instr_as_ssa_undef(insn));
default:
   ERROR("unknown nir_instr type %u\n", insn->type);
   return false;
@@ -2111,6 +2114,16 @@ Converter::visit(nir_alu_instr *insn)
 }
 #undef DEFAULT_CHECKS
 
+bool
+Converter::visit(nir_ssa_undef_instr *insn)
+{
+   LValues  = convert(>def);
+   for (auto i = 0u; i < insn->def.num_components; ++i) {
+  mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
+   }
+   return true;
+}
+
 bool
 Converter::run()
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 33/34] nvir/nir: implement load_per_vertex_output

2018-03-19 Thread Karol Herbst
v4: use smarter getIndirect helper
use new getSlotAddress helper
v5: use loadFrom helper

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 23 ++
 1 file changed, 23 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index bc0127bef84..19086157baa 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -2005,6 +2005,29 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_load_per_vertex_output: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectVertex;
+  Value *indirectOffset;
+  auto baseVertex = getIndirect(>src[0], 0, );
+  auto idx = getIndirect(insn, 1, 0, );
+  Value *vtxBase = nullptr;
+
+  if (indirectVertex)
+ vtxBase = indirectVertex;
+  else
+ vtxBase = loadImm(nullptr, baseVertex);
+
+  vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, 
vtxBase);
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
+  indirectOffset, vtxBase, info->in[idx].patch);
+  }
+  break;
+   }
case nir_intrinsic_emit_vertex:
case nir_intrinsic_end_primitive: {
   auto idx = nir_intrinsic_stream_id(insn);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 32/34] nvir/nir: add memory barriers

2018-03-19 Thread Karol Herbst
v5: add more barrier intrinsics

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp  | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 5c3fde32601..bc0127bef84 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -489,6 +489,14 @@ Converter::getSubOp(nir_intrinsic_op op)
CASE_OP_INTR_ATOM(and, AND);
CASE_OP_INTR_ATOM(comp_swap, CAS);
CASE_OP_INTR_ATOM(exchange, EXCH);
+   case nir_intrinsic_group_memory_barrier:
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_memory_barrier_atomic_counter:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+  return NV50_IR_SUBOP_MEMBAR(M, GL);
+   case nir_intrinsic_memory_barrier_shared:
+  return NV50_IR_SUBOP_MEMBAR(M, CTA);
CASE_OP_INTR_ATOM(or, OR);
case nir_intrinsic_image_atomic_max:
CASE_OP_INTR_ATOM_S(imax, MAX);
@@ -2287,6 +2295,17 @@ Converter::visit(nir_intrinsic_instr *insn)
   bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
   break;
}
+   case nir_intrinsic_group_memory_barrier:
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_memory_barrier_atomic_counter:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+   case nir_intrinsic_memory_barrier_shared: {
+  Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
+  bar->fixed = 1;
+  bar->subOp = getSubOp(op);
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 27/34] nvir/nir: implement variable indexing

2018-03-19 Thread Karol Herbst
we store those arrays in local memory and reserve some space for each of the
arrays. The arrays are stored in a packed format, because we know quite easily
the context of each index. We don't do that in TGSI so far.

This causes various issues to come up in the MemoryOpt pass, because ld/st with
indirects aren't guarenteed to be aligned to 0x10 anymore.

v3: use fixed size vec4 arrays until we fix MemoryOpt
v4: fix for 64 bit types
v5: use loadFrom helper

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 58 ++
 1 file changed, 58 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index f594e299645..e73f4ae1e36 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -80,6 +80,7 @@ private:
typedef decltype(nir_ssa_def().index) NirSSADefIdx;
typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize;
typedef std::unordered_map<NirSSADefIdx, LValues> NirDefMap;
+   typedef std::unordered_map<NirSSADefIdx, uint32_t> NirArrayLMemOffsets;
typedef std::unordered_map<decltype(nir_block().index), BasicBlock*> 
NirBlockMap;
 
TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
@@ -157,6 +158,7 @@ private:
 
NirDefMap ssaDefs;
NirDefMap regDefs;
+   NirArrayLMemOffsets regToLmemOffset;
NirBlockMap blocks;
unsigned int curLoopDepth;
 
@@ -1259,6 +1261,7 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile 
file, operation op,
 bool
 Converter::parseNIR()
 {
+   info->bin.tlsSpace = 0;
info->io.clipDistances = nir->info.clip_distance_array_size;
info->io.cullDistances = nir->info.cull_distance_array_size;
 
@@ -1348,6 +1351,16 @@ Converter::visit(nir_function *function)
   break;
}
 
+   nir_foreach_register(reg, >impl->registers) {
+  if (reg->num_array_elems) {
+ /* TODO: packed variables would be nice, but MemoryOpt fails */
+ /* replace 4 with reg->num_components */
+ uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
+ regToLmemOffset[reg->index] = info->bin.tlsSpace;
+ info->bin.tlsSpace += size;
+  }
+   }
+
nir_index_ssa_defs(function->impl);
foreach_list_typed(nir_cf_node, node, node, >impl->body) {
   if (!visit(node))
@@ -2035,6 +2048,51 @@ Converter::visit(nir_alu_instr *insn)
 *   2. they basically just merge multiple values into one data type
 */
CASE_OPFI(mov):
+  if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) 
{
+ nir_reg_dest& reg = insn->dest.dest.reg;
+ auto goffset = regToLmemOffset[reg.reg->index];
+ auto comps = reg.reg->num_components;
+ auto size = reg.reg->bit_size / 8;
+ auto csize = 4 * size; /* TODO after fixing MemoryOpts: comps * size; 
*/
+ auto aoffset = csize * reg.base_offset;
+ Value *indirect = nullptr;
+
+ if (reg.indirect)
+indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
+  getSrc(reg.indirect, 0), mkImm(csize));
+
+ for (auto i = 0u; i < comps; ++i) {
+if (!((1u << i) & insn->dest.write_mask))
+   continue;
+
+Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + 
aoffset + i * size);
+mkStore(OP_STORE, dType, sym, indirect, getSrc(>src[0], i));
+ }
+ break;
+  } else if (!insn->src[0].src.is_ssa && 
insn->src[0].src.reg.reg->num_array_elems) {
+ LValues  = convert(>dest);
+ nir_reg_src& reg = insn->src[0].src.reg;
+ auto goffset = regToLmemOffset[reg.reg->index];
+ /* auto comps = reg.reg->num_components; */
+ auto size = reg.reg->bit_size / 8;
+ auto csize = 4 * size; /* TODO after fixing MemoryOpts: comps * size; 
*/
+ auto aoffset = csize * reg.base_offset;
+ Value *indirect = nullptr;
+
+ if (reg.indirect)
+indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), 
getSrc(reg.indirect, 0), mkImm(csize));
+
+ for (auto i = 0u; i < newDefs.size(); ++i)
+loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + 
aoffset, i, indirect);
+
+ break;
+  } else {
+ LValues  = convert(>dest);
+ for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
+mkMov(newDefs[c], getSrc(>src[0], c), dType);
+ }
+  }
+  break;
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4: {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 30/34] nvir/nir: implement ssbo intrinsics

2018-03-19 Thread Karol Herbst
v4: use loadFrom helper
v5: support indirect buffer access

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 90 ++
 1 file changed, 90 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index c415fa71738..e15f1734cc3 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -449,6 +449,24 @@ int
 Converter::getSubOp(nir_intrinsic_op op)
 {
switch (op) {
+   case nir_intrinsic_ssbo_atomic_add:
+  return NV50_IR_SUBOP_ATOM_ADD;
+   case nir_intrinsic_ssbo_atomic_and:
+  return NV50_IR_SUBOP_ATOM_AND;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+  return NV50_IR_SUBOP_ATOM_CAS;
+   case nir_intrinsic_ssbo_atomic_exchange:
+  return NV50_IR_SUBOP_ATOM_EXCH;
+   case nir_intrinsic_ssbo_atomic_or:
+  return NV50_IR_SUBOP_ATOM_OR;
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_umax:
+  return NV50_IR_SUBOP_ATOM_MAX;
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_umin:
+  return NV50_IR_SUBOP_ATOM_MIN;
+   case nir_intrinsic_ssbo_atomic_xor:
+  return NV50_IR_SUBOP_ATOM_XOR;
case nir_intrinsic_vote_all:
   return NV50_IR_SUBOP_VOTE_ALL;
case nir_intrinsic_vote_any:
@@ -1905,6 +1923,78 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_get_buffer_size: {
+  LValues  = convert(>dest);
+  const DataType dType = getDType(insn);
+  Value *indirectBuffer;
+  uint32_t buffer = getIndirect(>src[0], 0, );
+
+  Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
+  mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, 
indirectBuffer);
+  break;
+   }
+   case nir_intrinsic_store_ssbo: {
+  DataType sType = getSType(insn->src[0], false, false);
+  Value *indirectBuffer;
+  Value *indirectOffset;
+  uint32_t buffer = getIndirect(>src[1], 0, );
+  uint32_t offset = getIndirect(>src[2], 0, );
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+continue;
+ Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
+offset + i * typeSizeof(sType));
+ mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(>src[0], 
i))
+->setIndirect(0, 1, indirectBuffer);
+  }
+  info->io.globalAccess |= 0x2;
+  break;
+   }
+   case nir_intrinsic_load_ssbo: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectBuffer;
+  Value *indirectOffset;
+  uint32_t buffer = getIndirect(>src[0], 0, );
+  uint32_t offset = getIndirect(>src[1], 0, );
+
+  for (auto i = 0u; i < insn->num_components; ++i)
+ loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
+  indirectOffset, indirectBuffer);
+
+  info->io.globalAccess |= 0x1;
+  break;
+   }
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_ssbo_atomic_exchange:
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_ssbo_atomic_xor: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectBuffer;
+  Value *indirectOffset;
+  uint32_t buffer = getIndirect(>src[0], 0, );
+  uint32_t offset = getIndirect(>src[1], 0, );
+
+  Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
+  Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
+getSrc(>src[2], 0));
+  if (op == nir_intrinsic_ssbo_atomic_comp_swap)
+ atom->setSrc(2, getSrc(>src[3], 0));
+  atom->setIndirect(0, 0, indirectOffset);
+  atom->setIndirect(0, 1, indirectBuffer);
+  atom->subOp = getSubOp(op);
+
+  info->io.globalAccess |= 0x2;
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 22/34] nvir/nir: implement loading system values

2018-03-19 Thread Karol Herbst
v2: support more sys values
fixed a bug where for multi component reads all values ended up in x
v3: add load_patch_vertices_in
v4: add subgroup stuff
v5: add helper invocation
v6: fix loading 64 bit system values

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 122 +
 1 file changed, 122 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 2bd40e00db9..43a15fe0a76 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -85,6 +85,7 @@ private:
LValues& convert(nir_alu_dest *);
BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
+   SVSemantic convert(nir_intrinsic_op);
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
 
@@ -1457,6 +1458,70 @@ Converter::visit(nir_instr *insn)
return true;
 }
 
+SVSemantic
+Converter::convert(nir_intrinsic_op intr)
+{
+   switch (intr) {
+   case nir_intrinsic_load_base_vertex:
+  return SV_BASEVERTEX;
+   case nir_intrinsic_load_base_instance:
+  return SV_BASEINSTANCE;
+   case nir_intrinsic_load_draw_id:
+  return SV_DRAWID;
+   case nir_intrinsic_load_front_face:
+  return SV_FACE;
+   case nir_intrinsic_load_helper_invocation:
+  return SV_THREAD_KILL;
+   case nir_intrinsic_load_instance_id:
+  return SV_INSTANCE_ID;
+   case nir_intrinsic_load_invocation_id:
+  return SV_INVOCATION_ID;
+   case nir_intrinsic_load_local_group_size:
+  return SV_NTID;
+   case nir_intrinsic_load_local_invocation_id:
+  return SV_TID;
+   case nir_intrinsic_load_num_work_groups:
+  return SV_NCTAID;
+   case nir_intrinsic_load_patch_vertices_in:
+  return SV_VERTEX_COUNT;
+   case nir_intrinsic_load_primitive_id:
+  return SV_PRIMITIVE_ID;
+   case nir_intrinsic_load_sample_id:
+  return SV_SAMPLE_INDEX;
+   case nir_intrinsic_load_sample_mask_in:
+  return SV_SAMPLE_MASK;
+   case nir_intrinsic_load_sample_pos:
+  return SV_SAMPLE_POS;
+   case nir_intrinsic_load_subgroup_eq_mask:
+  return SV_LANEMASK_EQ;
+   case nir_intrinsic_load_subgroup_ge_mask:
+  return SV_LANEMASK_GE;
+   case nir_intrinsic_load_subgroup_gt_mask:
+  return SV_LANEMASK_GT;
+   case nir_intrinsic_load_subgroup_le_mask:
+  return SV_LANEMASK_LE;
+   case nir_intrinsic_load_subgroup_lt_mask:
+  return SV_LANEMASK_LT;
+   case nir_intrinsic_load_subgroup_invocation:
+  return SV_LANEID;
+   case nir_intrinsic_load_tess_coord:
+  return SV_TESS_COORD;
+   case nir_intrinsic_load_tess_level_inner:
+  return SV_TESS_INNER;
+   case nir_intrinsic_load_tess_level_outer:
+  return SV_TESS_OUTER;
+   case nir_intrinsic_load_vertex_id:
+  return SV_VERTEX_ID;
+   case nir_intrinsic_load_work_group_id:
+  return SV_CTAID;
+   default:
+  ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
+nir_intrinsic_infos[intr].name);
+  assert(false);
+  return SV_LAST;
+   }
+}
+
 bool
 Converter::visit(nir_intrinsic_instr *insn)
 {
@@ -1622,6 +1687,63 @@ Converter::visit(nir_intrinsic_instr *insn)
   mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
   break;
}
+   case nir_intrinsic_load_base_vertex:
+   case nir_intrinsic_load_base_instance:
+   case nir_intrinsic_load_draw_id:
+   case nir_intrinsic_load_front_face:
+   case nir_intrinsic_load_helper_invocation:
+   case nir_intrinsic_load_instance_id:
+   case nir_intrinsic_load_invocation_id:
+   case nir_intrinsic_load_local_group_size:
+   case nir_intrinsic_load_local_invocation_id:
+   case nir_intrinsic_load_num_work_groups:
+   case nir_intrinsic_load_patch_vertices_in:
+   case nir_intrinsic_load_primitive_id:
+   case nir_intrinsic_load_sample_id:
+   case nir_intrinsic_load_sample_mask_in:
+   case nir_intrinsic_load_sample_pos:
+   case nir_intrinsic_load_subgroup_eq_mask:
+   case nir_intrinsic_load_subgroup_ge_mask:
+   case nir_intrinsic_load_subgroup_gt_mask:
+   case nir_intrinsic_load_subgroup_le_mask:
+   case nir_intrinsic_load_subgroup_lt_mask:
+   case nir_intrinsic_load_subgroup_invocation:
+   case nir_intrinsic_load_tess_coord:
+   case nir_intrinsic_load_tess_level_inner:
+   case nir_intrinsic_load_tess_level_outer:
+   case nir_intrinsic_load_vertex_id:
+   case nir_intrinsic_load_work_group_id: {
+  const DataType dType = getDType(insn);
+  SVSemantic sv = convert(op);
+  LValues  = convert(>dest);
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ Value *def;
+ if (typeSizeof(dType) == 8)
+def = getSSA();
+ else
+def = newDefs[i];
+
+ if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
+loadImm(def, 0u);
+ } else {
+Symbol

[Mesa-dev] [PATCH v6 16/34] nvir/nir: add skeleton for nir_intrinsic_instr

2018-03-19 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp| 17 +
 1 file changed, 17 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 2a9c0929e90..3ba0285b411 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -105,6 +105,7 @@ private:
bool visit(nir_function *);
bool visit(nir_if *);
bool visit(nir_instr *);
+   bool visit(nir_intrinsic_instr *);
bool visit(nir_jump_instr *);
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
@@ -1265,6 +1266,8 @@ bool
 Converter::visit(nir_instr *insn)
 {
switch (insn->type) {
+   case nir_instr_type_intrinsic:
+  return visit(nir_instr_as_intrinsic(insn));
case nir_instr_type_jump:
   return visit(nir_instr_as_jump(insn));
case nir_instr_type_load_const:
@@ -1276,6 +1279,20 @@ Converter::visit(nir_instr *insn)
return true;
 }
 
+bool
+Converter::visit(nir_intrinsic_instr *insn)
+{
+   nir_intrinsic_op op = insn->intrinsic;
+
+   switch (op) {
+   default:
+  ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
+  return false;
+   }
+
+   return true;
+}
+
 bool
 Converter::visit(nir_jump_instr *insn)
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 20/34] nvir/nir: implement nir_intrinsic_load_(interpolated_)input

2018-03-19 Thread Karol Herbst
v3: and load_output
v4: use smarter getIndirect helper
use new getSlotAddress helper
v5: don't use const_offset directly
fix for indirects
v6: add support for interpolateAt

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 97 ++
 1 file changed, 97 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 546a73f7a74..04a0f03ae2f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1511,6 +1511,103 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_interpolated_input:
+   case nir_intrinsic_load_output: {
+  const DataType dType = getDType(insn);
+  Value *indirect;
+  bool input = op != nir_intrinsic_load_output;
+  operation nvirOp;
+  uint32_t mode;
+
+  LValues  = convert(>dest);
+  auto idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input 
? 1 : 0, 0, );
+  nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
+
+  /* see load_barycentric_* handling */
+  if (prog->getType() == Program::TYPE_FRAGMENT) {
+ mode = translateInterpMode(, nvirOp);
+ if (op == nir_intrinsic_load_interpolated_input) {
+ImmediateValue immMode;
+if (getSrc(>src[0], 
1)->getUniqueInsn()->src(0).getImmediate(immMode))
+   mode |= immMode.reg.data.u32;
+ }
+  }
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : 
FILE_SHADER_OUTPUT, 0, dType, address);
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+int s = 1;
+if (typeSizeof(dType) == 8) {
+   Value *lo = getSSA();
+   Value *hi = getSSA();
+   Instruction *interp;
+
+   interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s, getSrc(>src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+
+   Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : 
FILE_SHADER_OUTPUT, 0, dType, address + 4);
+   interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s++, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s++, getSrc(>src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+
+   mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
+} else {
+   Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s++, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s++, getSrc(>src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+}
+ } else {
+mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
+ }
+  }
+  break;
+   }
+   case nir_intrinsic_load_barycentric_at_offset:
+   case nir_intrinsic_load_barycentric_at_sample:
+   case nir_intrinsic_load_barycentric_centroid:
+   case nir_intrinsic_load_barycentric_pixel: {
+  LValues  = convert(>dest);
+  uint32_t mode;
+
+  if (op == nir_intrinsic_load_barycentric_centroid) {
+ mode = NV50_IR_INTERP_CENTROID;
+  } else if (op == nir_intrinsic_load_barycentric_at_offset) {
+ Value *offs[2];
+ for (auto c = 0u; c < 2; c++) {
+offs[c] = getScratch();
+mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(>src[0], c), 
loadImm(NULL, 0.4375f));
+mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
+mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
+mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
+ }
+ mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), 
offs[0]);
+
+ mode = NV50_IR_INTERP_OFFSET;
+  } else if (op == nir_intrinsic_load_barycentric_pixel) {
+ mode = NV50_IR_INTERP_DEFAULT;
+  } else if (op == nir_intrinsic_load_barycentric_at_sample) {
+ mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(>src[0], 
0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
+ mode = NV50_IR_INTERP_OFFSET;
+ 

[Mesa-dev] [PATCH v6 15/34] nvir/nir: implement nir_load_const_instr

2018-03-19 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index a10038f9a88..2a9c0929e90 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -106,6 +106,7 @@ private:
bool visit(nir_if *);
bool visit(nir_instr *);
bool visit(nir_jump_instr *);
+   bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
 
nir_shader *nir;
@@ -1266,6 +1267,8 @@ Converter::visit(nir_instr *insn)
switch (insn->type) {
case nir_instr_type_jump:
   return visit(nir_instr_as_jump(insn));
+   case nir_instr_type_load_const:
+  return visit(nir_instr_as_load_const(insn));
default:
   ERROR("unknown nir_instr type %u\n", insn->type);
   return false;
@@ -1300,6 +1303,21 @@ Converter::visit(nir_jump_instr *insn)
return true;
 }
 
+bool
+Converter::visit(nir_load_const_instr *insn)
+{
+   assert(insn->def.bit_size <= 64);
+
+   LValues  = convert(>def);
+   for (int i = 0; i < insn->def.num_components; i++) {
+  if (insn->def.bit_size > 32)
+ loadImm(newDefs[i], insn->value.u64[i]);
+  else
+ loadImm(newDefs[i], insn->value.u32[i]);
+   }
+   return true;
+}
+
 bool
 Converter::run()
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 18/34] nvir/nir: implement nir_intrinsic_load_uniform

2018-03-19 Thread Karol Herbst
v2: use new getIndirect helper
fixes symbols for 64 bit types
v4: use smarter getIndirect helper
simplify address calculation
use loadFrom helper

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 3a81a3ca32e..797b3e7c9d5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1455,6 +1455,16 @@ Converter::visit(nir_intrinsic_instr *insn)
nir_intrinsic_op op = insn->intrinsic;
 
switch (op) {
+   case nir_intrinsic_load_uniform: {
+  LValues  = convert(>dest);
+  const DataType dType = getDType(insn);
+  Value *indirect;
+  auto coffset = getIndirect(insn, 0, 0, );
+  for (auto i = 0; i < insn->num_components; ++i) {
+ loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, 
indirect);
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 12/34] nvir/nir: add loadFrom and storeTo helpler

2018-03-19 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 72 ++
 1 file changed, 72 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 8d547dbbea4..d4432684b27 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -76,6 +76,13 @@ private:
bool centroid,
unsigned semantics);
 
+   Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t 
base,
+ uint8_t c, Value *indirect0 = nullptr,
+ Value *indirect1 = nullptr, bool patch = false);
+   void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
+Value *src, uint8_t idx, uint8_t c, Value *indirect0 = nullptr,
+Value *indirect1 = nullptr);
+
bool isFloatType(nir_alu_type);
bool isSignedType(nir_alu_type);
bool isResultFloat(nir_op);
@@ -912,6 +919,71 @@ Converter::getSlotAddress(nir_intrinsic_instr *insn, 
uint8_t idx, uint8_t slot)
return vary[idx].slot[slot] * 4;
 }
 
+Instruction *
+Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
+uint32_t base, uint8_t c, Value *indirect0,
+Value *indirect1, bool patch)
+{
+   auto tySize = typeSizeof(ty);
+
+   if (tySize == 8 &&
+   (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) 
{
+  Value *lo = getSSA();
+  Value *hi = getSSA();
+
+  Instruction *loi =
+ mkLoad(TYPE_U32, lo,
+mkSymbol(file, i, TYPE_U32, base + c * tySize),
+indirect0);
+  loi->setIndirect(0, 1, indirect1);
+  loi->perPatch = patch;
+
+  Instruction *hii =
+ mkLoad(TYPE_U32, hi,
+mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
+indirect0);
+  hii->setIndirect(0, 1, indirect1);
+  hii->perPatch = patch;
+
+  return mkOp2(OP_MERGE, ty, def, lo, hi);
+   } else {
+  Instruction *ld =
+ mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
+  ld->setIndirect(0, 1, indirect1);
+  ld->perPatch = patch;
+  return ld;
+   }
+}
+
+void
+Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
+   DataType ty, Value *src, uint8_t idx, uint8_t c,
+   Value *indirect0, Value *indirect1)
+{
+   uint8_t size = typeSizeof(ty);
+   uint32_t address = getSlotAddress(insn, idx, c);
+
+   if (size == 8 && indirect0) {
+  Value *split[2];
+  mkSplit(split, 4, src);
+
+  if (op == OP_EXPORT) {
+ split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
+ split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
+  }
+
+  mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
+  split[0])->perPatch = info->out[idx].patch;
+  mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), 
indirect0,
+  split[1])->perPatch = info->out[idx].patch;
+   } else {
+  if (op == OP_EXPORT)
+ src = mkMov(getSSA(size), src, ty)->getDef(0);
+  mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
+  src)->perPatch = info->out[idx].patch;
+   }
+}
+
 bool
 Converter::run()
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 14/34] nvir/nir: implement CFG handling

2018-03-19 Thread Karol Herbst
v6: fix loops with blocks at the end nothing points to
skip blocks with no instructions and no predecessors

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 270 -
 1 file changed, 268 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 538c85b6a69..a10038f9a88 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -55,8 +55,10 @@ private:
typedef decltype(nir_ssa_def().index) NirSSADefIdx;
typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize;
typedef std::unordered_map<NirSSADefIdx, LValues> NirDefMap;
+   typedef std::unordered_map<decltype(nir_block().index), BasicBlock*> 
NirBlockMap;
 
LValues& convert(nir_alu_dest *);
+   BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
@@ -98,15 +100,46 @@ private:
bool assignSlots();
bool parseNIR();
 
+   bool visit(nir_block *);
+   bool visit(nir_cf_node *);
+   bool visit(nir_function *);
+   bool visit(nir_if *);
+   bool visit(nir_instr *);
+   bool visit(nir_jump_instr *);
+   bool visit(nir_loop *);
+
nir_shader *nir;
 
NirDefMap ssaDefs;
NirDefMap regDefs;
+   NirBlockMap blocks;
+   unsigned int curLoopDepth;
+
+   BasicBlock *exit;
+
+   union {
+  struct {
+ Value *position;
+  } fp;
+   };
 };
 
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
- nir(nir) {}
+ nir(nir),
+ curLoopDepth(0) {}
+
+BasicBlock *
+Converter::convert(nir_block *block)
+{
+   NirBlockMap::iterator it = blocks.find(block->index);
+   if (it != blocks.end())
+  return (*it).second;
+
+   BasicBlock *bb = new BasicBlock(func);
+   blocks[block->index] = bb;
+   return bb;
+}
 
 bool
 Converter::isFloatType(nir_alu_type type)
@@ -1039,6 +1072,234 @@ Converter::parseNIR()
return true;
 }
 
+bool
+Converter::visit(nir_function *function)
+{
+   /* we only support emiting the main function for now */
+   assert(!strcmp(function->name, "main"));
+   assert(function->impl);
+
+   /* usually the blocks will set everything up, but main is special */
+   BasicBlock *entry = new BasicBlock(prog->main);
+   exit = new BasicBlock(prog->main);
+   blocks[nir_start_block(function->impl)->index] = entry;
+   prog->main->setEntry(entry);
+   prog->main->setExit(exit);
+
+   setPosition(entry, true);
+
+   switch (prog->getType()) {
+   case Program::TYPE_TESSELLATION_CONTROL:
+  outBase = mkOp2v(
+ OP_SUB, TYPE_U32, getSSA(),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
+  break;
+   case Program::TYPE_FRAGMENT: {
+  Symbol *sv = mkSysVal(SV_POSITION, 3);
+  fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
+  fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
+  break;
+   }
+   default:
+  break;
+   }
+
+   nir_index_ssa_defs(function->impl);
+   foreach_list_typed(nir_cf_node, node, node, >impl->body) {
+  if (!visit(node))
+ return false;
+   }
+
+   bb->cfg.attach(>cfg, Graph::Edge::TREE);
+   setPosition(exit, true);
+
+   /* TODO: for non main function this needs to be a OP_RETURN */
+   mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
+   return true;
+}
+
+bool
+Converter::visit(nir_cf_node *node)
+{
+   switch (node->type) {
+   case nir_cf_node_block:
+  if (!visit(nir_cf_node_as_block(node)))
+ return false;
+  break;
+   case nir_cf_node_if:
+  if (!visit(nir_cf_node_as_if(node)))
+ return false;
+  break;
+   case nir_cf_node_loop:
+  if (!visit(nir_cf_node_as_loop(node)))
+ return false;
+  break;
+   default:
+  ERROR("unknown nir_cf_node type %u\n", node->type);
+  return false;
+   }
+   return true;
+}
+
+bool
+Converter::visit(nir_block *block)
+{
+   if (!block->predecessors->entries && block->instr_list.is_empty())
+  return true;
+
+   BasicBlock *bb = convert(block);
+
+   setPosition(bb, true);
+   nir_foreach_instr(insn, block) {
+  if (!visit(insn))
+ return false;
+   }
+   return true;
+}
+
+bool
+Converter::visit(nir_if *nif)
+{
+   DataType sType = getSType(nif->condition, false, false);
+   Value *src = getSrc(>condition, 0);
+
+   nir_block *lastThen = nir_if_last_then_block(nif);
+   nir_block *lastElse = nir_if_last_else_block(nif);
+
+   assert(!lastThen->successors[1]);
+   assert(!lastElse->successors[1]);
+
+   BasicBlock *ifBB = convert(nir_if_first_then_block(ni

[Mesa-dev] [PATCH v6 21/34] nvir/nir: implement intrinsic_discard(_if)

2018-03-19 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 04a0f03ae2f..2bd40e00db9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1608,6 +1608,20 @@ Converter::visit(nir_intrinsic_instr *insn)
   loadImm(newDefs[1], mode);
   break;
}
+   case nir_intrinsic_discard:
+  mkOp(OP_DISCARD, TYPE_NONE, NULL);
+  break;
+   case nir_intrinsic_discard_if: {
+  Value *pred = new_LValue(func, FILE_PREDICATE);
+  if (insn->num_components > 1) {
+ ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
+ assert(false);
+ return false;
+  }
+  mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(>src[0], 0), 
zero);
+  mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 19/34] nvir/nir: implement nir_intrinsic_store_(per_vertex_)output

2018-03-19 Thread Karol Herbst
v3: add workaround for RA issues
indirects have to be multiplied by 0x10
fix indirect access
v4: use smarter getIndirect helper
use storeTo helper
v5: don't use const_offset directly

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 46 ++
 1 file changed, 46 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 797b3e7c9d5..546a73f7a74 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1258,6 +1258,11 @@ Converter::visit(nir_function *function)
 
setPosition(entry, true);
 
+   if (info->io.genUserClip > 0) {
+  for (int c = 0; c < 4; ++c)
+ clipVtx[c] = getScratch();
+   }
+
switch (prog->getType()) {
case Program::TYPE_TESSELLATION_CONTROL:
   outBase = mkOp2v(
@@ -1284,6 +1289,9 @@ Converter::visit(nir_function *function)
bb->cfg.attach(>cfg, Graph::Edge::TREE);
setPosition(exit, true);
 
+   if (info->io.genUserClip > 0)
+  handleUserClipPlanes();
+
/* TODO: for non main function this needs to be a OP_RETURN */
mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
return true;
@@ -1465,6 +1473,44 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_store_output:
+   case nir_intrinsic_store_per_vertex_output: {
+  Value *indirect;
+  DataType dType = getSType(insn->src[0], false, false);
+  auto idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 
0, );
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+continue;
+
+ uint8_t offset = 0;
+ Value *src = getSrc(>src[0], i);
+ switch (prog->getType()) {
+ case Program::TYPE_FRAGMENT: {
+if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
+   /* TGSI uses a different interface than NIR, TGSI stores that
+* value in the z component, NIR in X
+*/
+   offset += 2;
+   src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
+}
+break;
+ }
+ case Program::TYPE_VERTEX: {
+if (info->io.genUserClip > 0) {
+   mkMov(clipVtx[i], src);
+   src = clipVtx[i];
+}
+break;
+ }
+ default:
+break;
+ }
+
+ storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + 
offset, indirect);
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 25/34] nvir/nir: add getOperation for intrinsics

2018-03-19 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 24 ++
 1 file changed, 24 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 008beb9a02a..ebf6a5ceb5c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -124,10 +124,12 @@ private:
std::vector getSTypes(nir_alu_instr*);
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
+   operation getOperation(nir_intrinsic_op);
operation getOperation(nir_op);
operation getOperation(nir_texop);
operation preOperationNeeded(nir_op);
 
+   int getSubOp(nir_intrinsic_op);
int getSubOp(nir_op);
 
CondCode getCondCode(nir_op);
@@ -404,6 +406,17 @@ Converter::getOperation(nir_texop op)
}
 }
 
+operation
+Converter::getOperation(nir_intrinsic_op op)
+{
+   switch (op) {
+   default:
+  ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
+  assert(false);
+  return OP_NOP;
+   }
+}
+
 operation
 Converter::preOperationNeeded(nir_op op)
 {
@@ -426,6 +439,17 @@ Converter::getSubOp(nir_op op)
}
 }
 
+int
+Converter::getSubOp(nir_intrinsic_op op)
+{
+   switch (op) {
+   default:
+  ERROR("couldn't get subop for nir_intrinsic_op %u\n", op);
+  assert(false);
+  return 0;
+   }
+}
+
 CondCode
 Converter::getCondCode(nir_op op)
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6 24/34] nvir/nir: implement nir_instr_type_tex

2018-03-19 Thread Karol Herbst
a lot of those fields are not valid for a lot of tex ops. Not quite sure if
it's worth the effort to check for those or just keep it like that. It seems
to kind of work.

v2: reworked offset handling
add tex support with indirect R/S arguments
handle GLSL_SAMPLER_DIM_EXTERNAL
drop reference in convert(glsl_sampler_dim&, bool, bool)
fix tg4 component selection
v5: fill up coords args with scratch values if coords provided is less than 
TexTarget.getArgCount()

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 229 +
 1 file changed, 229 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 6a43b764601..008beb9a02a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -82,6 +82,7 @@ private:
typedef std::unordered_map<NirSSADefIdx, LValues> NirDefMap;
typedef std::unordered_map<decltype(nir_block().index), BasicBlock*> 
NirBlockMap;
 
+   TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
LValues& convert(nir_alu_dest *);
BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
@@ -124,6 +125,7 @@ private:
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
operation getOperation(nir_op);
+   operation getOperation(nir_texop);
operation preOperationNeeded(nir_op);
 
int getSubOp(nir_op);
@@ -144,6 +146,10 @@ private:
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
bool visit(nir_ssa_undef_instr *);
+   bool visit(nir_tex_instr *);
+
+   /* tex stuff */
+   Value* applyProjection(Value *src, Value *proj);
 
nir_shader *nir;
 
@@ -368,6 +374,36 @@ Converter::getOperation(nir_op op)
}
 }
 
+operation
+Converter::getOperation(nir_texop op)
+{
+   switch (op) {
+   case nir_texop_tex:
+  return OP_TEX;
+   case nir_texop_lod:
+  return OP_TXLQ;
+   case nir_texop_txb:
+  return OP_TXB;
+   case nir_texop_txd:
+  return OP_TXD;
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+  return OP_TXF;
+   case nir_texop_tg4:
+  return OP_TXG;
+   case nir_texop_txl:
+  return OP_TXL;
+   case nir_texop_query_levels:
+   case nir_texop_texture_samples:
+   case nir_texop_txs:
+  return OP_TXQ;
+   default:
+  ERROR("couldn't get operation for nir_texop %u\n", op);
+  assert(false);
+  return OP_NOP;
+   }
+}
+
 operation
 Converter::preOperationNeeded(nir_op op)
 {
@@ -1454,6 +1490,8 @@ Converter::visit(nir_instr *insn)
   return visit(nir_instr_as_load_const(insn));
case nir_instr_type_ssa_undef:
   return visit(nir_instr_as_ssa_undef(insn));
+   case nir_instr_type_tex:
+  return visit(nir_instr_as_tex(insn));
default:
   ERROR("unknown nir_instr type %u\n", insn->type);
   return false;
@@ -2124,6 +2162,197 @@ Converter::visit(nir_ssa_undef_instr *insn)
return true;
 }
 
+#define CASE_SAMPLER(ty) \
+   case GLSL_SAMPLER_DIM_ ## ty : \
+  if (isArray && !isShadow) \
+ return TEX_TARGET_ ## ty ## _ARRAY; \
+  else if (!isArray && isShadow) \
+ return TEX_TARGET_## ty ## _SHADOW; \
+  else if (isArray && isShadow) \
+ return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
+  else \
+ return TEX_TARGET_ ## ty
+
+TexTarget
+Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
+{
+   switch (dim) {
+   CASE_SAMPLER(1D);
+   CASE_SAMPLER(2D);
+   CASE_SAMPLER(CUBE);
+   case GLSL_SAMPLER_DIM_3D:
+  return TEX_TARGET_3D;
+   case GLSL_SAMPLER_DIM_MS:
+  if (isArray)
+ return TEX_TARGET_2D_MS_ARRAY;
+  return TEX_TARGET_2D_MS;
+   case GLSL_SAMPLER_DIM_RECT:
+  if (isShadow)
+ return TEX_TARGET_RECT_SHADOW;
+  return TEX_TARGET_RECT;
+   case GLSL_SAMPLER_DIM_BUF:
+  return TEX_TARGET_BUFFER;
+   case GLSL_SAMPLER_DIM_EXTERNAL:
+  return TEX_TARGET_2D;
+   default:
+  ERROR("unknown glsl_sampler_dim %u\n", dim);
+  assert(false);
+  return TEX_TARGET_COUNT;
+   }
+}
+#undef CASE_SAMPLER
+
+Value*
+Converter::applyProjection(Value *src, Value *proj)
+{
+   if (!proj)
+  return src;
+   return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
+}
+
+bool
+Converter::visit(nir_tex_instr *insn)
+{
+   switch (insn->op) {
+   case nir_texop_lod:
+   case nir_texop_query_levels:
+   case nir_texop_tex:
+   case nir_texop_texture_samples:
+   case nir_texop_tg4:
+   case nir_texop_txb:
+   case nir_texop_txd:
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+   case nir_texop_txl:
+   case nir_texop_txs: {
+  LValues  = convert(>dest);
+  std::vector<Value*> srcs;
+  std::vector<Value*> defs;
+  std::vector<nir_src*> offsets;
+  uint8_t mask = 0;
+  

[Mesa-dev] [PATCH v6 17/34] nvir/nir: implement nir_alu_instr handling

2018-03-19 Thread Karol Herbst
Signed-off-by: Karol Herbst <kher...@redhat.com>

v2: user bitfield_insert instead of bfi
rework switch helper macros
remove some lowering code (LoweringHelper is now used for this)
v3: add pack_half_2x16_split
add unpack_half_2x16_split_x/y
v5: replace first argument with nullptr in loadImm calls
prefer getSSA over getScratch
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 489 -
 1 file changed, 488 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 3ba0285b411..3a81a3ca32e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -34,6 +34,31 @@
 #include 
 #include 
 
+#define CASE_OPFI(ni) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni
+#define CASE_OPFIU(ni) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni
+#define CASE_OPIU(ni) \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni
+
+#define CASE_OPFI_RET(ni, val) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+  return val
+#define CASE_OPFIU_RET(ni, val) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni : \
+  return val
+#define CASE_OPIU_RET(ni, val) \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni : \
+  return val
+
 static int
 type_size(const struct glsl_type *type)
 {
@@ -97,9 +122,17 @@ private:
std::vector getSTypes(nir_alu_instr*);
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
+   operation getOperation(nir_op);
+   operation preOperationNeeded(nir_op);
+
+   int getSubOp(nir_op);
+
+   CondCode getCondCode(nir_op);
+
bool assignSlots();
bool parseNIR();
 
+   bool visit(nir_alu_instr *);
bool visit(nir_block *);
bool visit(nir_cf_node *);
bool visit(nir_function *);
@@ -118,6 +151,7 @@ private:
unsigned int curLoopDepth;
 
BasicBlock *exit;
+   Value *zero;
 
union {
   struct {
@@ -129,7 +163,10 @@ private:
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
  nir(nir),
- curLoopDepth(0) {}
+ curLoopDepth(0)
+{
+   zero = mkImm((uint32_t)0);
+}
 
 BasicBlock *
 Converter::convert(nir_block *block)
@@ -245,6 +282,137 @@ Converter::getSType(nir_src , bool isFloat, bool 
isSigned)
return typeOfSize(bitSize / 8, isFloat, isSigned);
 }
 
+operation
+Converter::getOperation(nir_op op)
+{
+   switch (op) {
+   // basic ops with float and int variants
+   CASE_OPFI_RET(abs, OP_ABS);
+   CASE_OPFI_RET(add, OP_ADD);
+   CASE_OPFI_RET(and, OP_AND);
+   CASE_OPFIU_RET(div, OP_DIV);
+   CASE_OPIU_RET(find_msb, OP_BFIND);
+   CASE_OPFIU_RET(max, OP_MAX);
+   CASE_OPFIU_RET(min, OP_MIN);
+   CASE_OPFIU_RET(mod, OP_MOD);
+   CASE_OPFI_RET(rem, OP_MOD);
+   CASE_OPFI_RET(mul, OP_MUL);
+   CASE_OPIU_RET(mul_high, OP_MUL);
+   CASE_OPFI_RET(neg, OP_NEG);
+   CASE_OPFI_RET(not, OP_NOT);
+   CASE_OPFI_RET(or, OP_OR);
+   CASE_OPFI_RET(eq, OP_SET);
+   CASE_OPFIU_RET(ge, OP_SET);
+   CASE_OPFIU_RET(lt, OP_SET);
+   CASE_OPFI_RET(ne, OP_SET);
+   CASE_OPIU_RET(shr, OP_SHR);
+   CASE_OPFI_RET(sub, OP_SUB);
+   CASE_OPFI_RET(xor, OP_XOR);
+   case nir_op_fceil:
+  return OP_CEIL;
+   case nir_op_fcos:
+  return OP_COS;
+   case nir_op_f2f32:
+   case nir_op_f2f64:
+   case nir_op_f2i32:
+   case nir_op_f2i64:
+   case nir_op_f2u32:
+   case nir_op_f2u64:
+   case nir_op_i2f32:
+   case nir_op_i2f64:
+   case nir_op_i2i32:
+   case nir_op_i2i64:
+   case nir_op_u2f32:
+   case nir_op_u2f64:
+   case nir_op_u2u32:
+   case nir_op_u2u64:
+  return OP_CVT;
+   case nir_op_fddx:
+   case nir_op_fddx_coarse:
+   case nir_op_fddx_fine:
+  return OP_DFDX;
+   case nir_op_fddy:
+   case nir_op_fddy_coarse:
+   case nir_op_fddy_fine:
+  return OP_DFDY;
+   case nir_op_fexp2:
+  return OP_EX2;
+   case nir_op_ffloor:
+  return OP_FLOOR;
+   case nir_op_ffma:
+  return OP_FMA;
+   case nir_op_flog2:
+  return OP_LG2;
+   case nir_op_pack_64_2x32_split:
+  return OP_MERGE;
+   case nir_op_frcp:
+  return OP_RCP;
+   case nir_op_frsq:
+  return OP_RSQ;
+   case nir_op_fsat:
+  return OP_SAT;
+   case nir_op_ishl:
+  return OP_SHL;
+   case nir_op_fsin:
+  return OP_SIN;
+   case nir_op_fsqrt:
+  return OP_SQRT;
+   case nir_op_ftrunc:
+  return OP_TRUNC;
+   default:
+  ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
+  assert(false);
+  return OP_NOP;
+   }
+}
+
+operation
+Converter::preOperationNeeded(nir_op op)
+{
+   switch (op) {
+   case nir_op_fcos:
+   case nir_op_fsin:
+  return OP_PRESIN;
+   default:
+  return OP_NOP;
+   }
+}
+
+int
+Converter::getSubOp(nir_op op)
+{
+   switch (op) {
+   CASE_OPIU_RET(mul_hi

[Mesa-dev] [PATCH v6 07/34] nvc0: add env var to make nir default

2018-03-19 Thread Karol Herbst
v2: allow for non debug builds as well
v3: move reading out env var more global
disable tg4 with multiple offsets with nir
disable caps for 64 bit types
v6: nv50 support
disable MS images
disable bindless textures

Acked-by: Pierre Moreau <pierre.mor...@free.fr>
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 src/gallium/drivers/nouveau/nouveau_screen.c   |  5 +
 src/gallium/drivers/nouveau/nouveau_screen.h   |  2 ++
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |  4 +++-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 19 +--
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c 
b/src/gallium/drivers/nouveau/nouveau_screen.c
index c144b39b2dd..2598c78a45b 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -176,9 +176,14 @@ nouveau_screen_init(struct nouveau_screen *screen, struct 
nouveau_device *dev)
union nouveau_bo_config mm_config;
 
char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG");
+   char *use_nir = getenv("NV50_PROG_USE_NIR");
+
if (nv_dbg)
   nouveau_mesa_debug = atoi(nv_dbg);
 
+   if (use_nir)
+  screen->prefer_nir = strtol(use_nir, NULL, 0) == 1;
+
/* These must be set before any failure is possible, as the cleanup
 * paths assume they're responsible for deleting them.
 */
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h 
b/src/gallium/drivers/nouveau/nouveau_screen.h
index e4fbae99ca4..1229b66b26f 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -62,6 +62,8 @@ struct nouveau_screen {
 
struct disk_cache *disk_shader_cache;
 
+   bool prefer_nir;
+
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
union {
   uint64_t v[29];
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index ce82c0e80f2..222199a38e4 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -310,6 +310,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen,
  enum pipe_shader_type shader,
  enum pipe_shader_cap param)
 {
+   const struct nouveau_screen *screen = nouveau_screen(pscreen);
+
switch (shader) {
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
@@ -363,7 +365,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
   return MIN2(16, PIPE_MAX_SAMPLERS);
case PIPE_SHADER_CAP_PREFERRED_IR:
-  return PIPE_SHADER_IR_TGSI;
+  return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
   return 32;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 37fe173f6b6..3e00a044265 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -90,9 +90,11 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
 
if (bindings & PIPE_BIND_SHADER_IMAGE) {
   if (sample_count > 0 &&
-  nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS) {
+  (nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS ||
+   nouveau_screen(pscreen)->prefer_nir)) {
  /* MS images are currently unsupported on Maxwell because they have to
   * be handled explicitly. */
+ /* MS images are currently unsupported with NIR */
  return false;
   }
 
@@ -112,7 +114,8 @@ static int
 nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 {
const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
-   struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+   const struct nouveau_screen *screen = nouveau_screen(pscreen);
+   struct nouveau_device *dev = screen->device;
 
switch (param) {
/* non-boolean caps */
@@ -216,7 +219,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
-   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
@@ -256,6 +258,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
   return 1;
+   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+  /* TODO: nir doesn't support tg4 with multiple offsets */
+  return screen->prefer_nir ? 0 : 1;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
   return nouveau_screen(ps

[Mesa-dev] [PATCH v6 13/34] nvir/nir: parse NIR shader info

2018-03-19 Thread Karol Herbst
v2: parse a few more fields
v3: add special handling for GL_ISOLINES

Signed-off-by: Karol Herbst <kher...@redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 60 ++
 1 file changed, 60 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index d4432684b27..538c85b6a69 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -96,6 +96,7 @@ private:
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
bool assignSlots();
+   bool parseNIR();
 
nir_shader *nir;
 
@@ -984,6 +985,60 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile 
file, operation op,
}
 }
 
+bool
+Converter::parseNIR()
+{
+   info->io.clipDistances = nir->info.clip_distance_array_size;
+   info->io.cullDistances = nir->info.cull_distance_array_size;
+
+   switch(prog->getType()) {
+   case Program::TYPE_COMPUTE:
+  info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
+  info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
+  info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
+  info->bin.smemSize = nir->info.cs.shared_size;
+  break;
+   case Program::TYPE_FRAGMENT:
+  info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
+  info->prop.fp.persampleInvocation =
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
+  info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
+  info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
+  info->prop.fp.usesSampleMaskIn =
+ !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
+  break;
+   case Program::TYPE_GEOMETRY:
+  info->prop.gp.inputPrim = nir->info.gs.input_primitive;
+  info->prop.gp.instanceCount = nir->info.gs.invocations;
+  info->prop.gp.maxVertices = nir->info.gs.vertices_out;
+  info->prop.gp.outputPrim = nir->info.gs.output_primitive;
+  break;
+   case Program::TYPE_TESSELLATION_CONTROL:
+   case Program::TYPE_TESSELLATION_EVAL:
+  if (nir->info.tess.primitive_mode == GL_ISOLINES)
+ info->prop.tp.domain = GL_LINES;
+  else
+ info->prop.tp.domain = nir->info.tess.primitive_mode;
+  info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
+  info->prop.tp.outputPrim =
+ nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
+  info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
+  info->prop.tp.winding = !nir->info.tess.ccw;
+  break;
+   case Program::TYPE_VERTEX:
+  info->prop.vp.usesDrawParameters =
+ (nir->info.system_values_read & 
BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
+ (nir->info.system_values_read & 
BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
+ (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
+  break;
+   default:
+  break;
+   }
+
+   return true;
+}
+
 bool
 Converter::run()
 {
@@ -1020,6 +1075,11 @@ Converter::run()
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
   nir_print_shader(nir, stderr);
 
+   if (!parseNIR()) {
+  ERROR("Couldn't prase NIR!\n");
+  return false;
+   }
+
if (!assignSlots()) {
   ERROR("Couldn't assign slots!\n");
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


<    1   2   3   4   5   6   7   8   9   10   >