[Mesa-dev] [PATCH v7 09/35] nvir/nir: run some passes to make the conversion easier
v2: add constant_folding v6: print non final NIR only for verbose debugging Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 39 ++ 1 file changed, 39 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index b22c62fd434..0b7a5981f73 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -31,6 +31,12 @@ #include "codegen/nv50_ir_lowering_helper.h" #include "codegen/nv50_ir_util.h" +static int +type_size(const struct glsl_type *type) +{ + return glsl_count_attribute_slots(type, false); +} + namespace { using namespace nv50_ir; @@ -52,6 +58,39 @@ Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) bool Converter::run() { + bool progress; + + if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) + nir_print_shader(nir, stderr); + + NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0); + NIR_PASS_V(nir, nir_lower_regs_to_ssa); + NIR_PASS_V(nir, nir_lower_load_const_to_scalar); + NIR_PASS_V(nir, nir_lower_vars_to_ssa); + NIR_PASS_V(nir, nir_lower_alu_to_scalar); + NIR_PASS_V(nir, nir_lower_phis_to_scalar); + + do { + progress = false; + /* some ops depend on having constants as sources, but those can also + * point to expressions made from constants like 0 + 1 + */ + NIR_PASS(progress, nir, nir_opt_constant_folding); + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(progress, nir, nir_opt_dead_cf); + } while (progress); + + NIR_PASS_V(nir, nir_lower_locals_to_regs); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_local); + NIR_PASS_V(nir, nir_convert_from_ssa, true); + + /* Garbage collect dead instructions */ + nir_sweep(nir); + + if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) + nir_print_shader(nir, stderr); + return false; } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v7 21/35] nvir/nir: implement load_(interpolated_)input/output
v3: and load_output v4: use smarter getIndirect helper use new getSlotAddress helper v5: don't use const_offset directly fix for indirects v6: add support for interpolateAt v7: fix compiler warnings add load_barycentric_sample handle load_output for fragment shaders Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 134 + 1 file changed, 134 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index b34fe7739d8..740dee5c95a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -1523,6 +1523,140 @@ Converter::visit(nir_intrinsic_instr *insn) } break; } + case nir_intrinsic_load_input: + case nir_intrinsic_load_interpolated_input: + case nir_intrinsic_load_output: { + LValues = convert(>dest); + + /* FBFetch */ + if (prog->getType() == Program::TYPE_FRAGMENT && + op == nir_intrinsic_load_output) { + std::vector<Value*> defs, srcs; + uint8_t mask = 0; + + srcs.push_back(getSSA()); + srcs.push_back(getSSA()); + Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0)); + Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1)); + mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z; + mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z; + + srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0))); + srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0))); + + for (auto i = 0u; i < insn->num_components; ++i) { +defs.push_back(newDefs[i]); +mask |= 1 << i; + } + + TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs); + texi->tex.levelZero = 1; + texi->tex.mask = mask; + texi->tex.useOffsets = 0; + texi->tex.r = 0x; + texi->tex.s = 0x; + + info->prop.fp.readsFramebuffer = true; + break; + } + + const DataType dType = getDType(insn); + Value *indirect; + bool input = op != nir_intrinsic_load_output; + operation nvirOp; + uint32_t mode = 0; + + auto idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect); + nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx]; + + /* see load_barycentric_* handling */ + if (prog->getType() == Program::TYPE_FRAGMENT) { + mode = translateInterpMode(, nvirOp); + if (op == nir_intrinsic_load_interpolated_input) { +ImmediateValue immMode; +if (getSrc(>src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode)) + mode |= immMode.reg.data.u32; + } + } + + for (auto i = 0u; i < insn->num_components; ++i) { + uint32_t address = getSlotAddress(insn, idx, i); + Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address); + if (prog->getType() == Program::TYPE_FRAGMENT) { +int s = 1; +if (typeSizeof(dType) == 8) { + Value *lo = getSSA(); + Value *hi = getSSA(); + Instruction *interp; + + interp = mkOp1(nvirOp, TYPE_U32, lo, sym); + if (nvirOp == OP_PINTERP) + interp->setSrc(s, fp.position); + if (mode & NV50_IR_INTERP_OFFSET) + interp->setSrc(s, getSrc(>src[0], 0)); + interp->setInterpolate(mode); + interp->setIndirect(0, 0, indirect); + + Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4); + interp = mkOp1(nvirOp, TYPE_U32, hi, sym1); + if (nvirOp == OP_PINTERP) + interp->setSrc(s++, fp.position); + if (mode & NV50_IR_INTERP_OFFSET) + interp->setSrc(s++, getSrc(>src[0], 0)); + interp->setInterpolate(mode); + interp->setIndirect(0, 0, indirect); + + mkOp2(OP_MERGE, dType, newDefs[i], lo, hi); +} else { + Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym); + if (nvirOp == OP_PINTERP) + interp->setSrc(s++, fp.position); + if (mode & NV50_IR_INTERP_OFFSET) + interp->setSrc(s++, getSrc(>src[0], 0)); + interp->setInterpolate(mode); + interp->setIndirect(0, 0, indirect); +} + } else { +mkLoad(dType, n
[Mesa-dev] [PATCH v7 14/35] nvir/nir: parse NIR shader info
v2: parse a few more fields v3: add special handling for GL_ISOLINES Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 60 ++ 1 file changed, 60 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 00ca1ae1512..4bb99c6635c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -96,6 +96,7 @@ private: DataType getSType(nir_src&, bool isFloat, bool isSigned); bool assignSlots(); + bool parseNIR(); nir_shader *nir; @@ -996,6 +997,60 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op, } } +bool +Converter::parseNIR() +{ + info->io.clipDistances = nir->info.clip_distance_array_size; + info->io.cullDistances = nir->info.cull_distance_array_size; + + switch(prog->getType()) { + case Program::TYPE_COMPUTE: + info->prop.cp.numThreads[0] = nir->info.cs.local_size[0]; + info->prop.cp.numThreads[1] = nir->info.cs.local_size[1]; + info->prop.cp.numThreads[2] = nir->info.cs.local_size[2]; + info->bin.smemSize = nir->info.cs.shared_size; + break; + case Program::TYPE_FRAGMENT: + info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests; + info->prop.fp.persampleInvocation = + (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) || + (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS); + info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage; + info->prop.fp.usesDiscard = nir->info.fs.uses_discard; + info->prop.fp.usesSampleMaskIn = + !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN); + break; + case Program::TYPE_GEOMETRY: + info->prop.gp.inputPrim = nir->info.gs.input_primitive; + info->prop.gp.instanceCount = nir->info.gs.invocations; + info->prop.gp.maxVertices = nir->info.gs.vertices_out; + info->prop.gp.outputPrim = nir->info.gs.output_primitive; + break; + case Program::TYPE_TESSELLATION_CONTROL: + case Program::TYPE_TESSELLATION_EVAL: + if (nir->info.tess.primitive_mode == GL_ISOLINES) + info->prop.tp.domain = GL_LINES; + else + info->prop.tp.domain = nir->info.tess.primitive_mode; + info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out; + info->prop.tp.outputPrim = + nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES; + info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3; + info->prop.tp.winding = !nir->info.tess.ccw; + break; + case Program::TYPE_VERTEX: + info->prop.vp.usesDrawParameters = + (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) || + (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) || + (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)); + break; + default: + break; + } + + return true; +} + bool Converter::run() { @@ -1029,6 +1084,11 @@ Converter::run() /* Garbage collect dead instructions */ nir_sweep(nir); + if (!parseNIR()) { + ERROR("Couldn't prase NIR!\n"); + return false; + } + if (!assignSlots()) { ERROR("Couldn't assign slots!\n"); return false; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v7 12/35] nvir/nir: run assignSlots
v2: add support for geometry shaders set idx add some missing mappings fix for 64bit inputs/outputs fix up some FP color output index messup parse centroid flag v3: fix arrays in outputs as well fix input/ouput size calculation for tessellation shaders v4: add getSlotAddress helper fix for 64 bit typed inputs v5: change getSlotAddress interface for easier use fix sample inputs fix slot counting for mat v7: fix driver_location of images Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 626 + 1 file changed, 626 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 89c55a08ef8..1d1c4526d2b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -69,6 +69,13 @@ private: uint32_t getIndirect(nir_src *, uint8_t, Value*&); uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&); + uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot); + + void setInterpolate(nv50_ir_varying *, + decltype(nir_variable().data.interpolation), + bool centroid, + unsigned semantics); + bool isFloatType(nir_alu_type); bool isSignedType(nir_alu_type); bool isResultFloat(nir_op); @@ -81,6 +88,8 @@ private: std::vector getSTypes(nir_alu_instr*); DataType getSType(nir_src&, bool isFloat, bool isSigned); + bool assignSlots(); + nir_shader *nir; NirDefMap ssaDefs; @@ -303,6 +312,618 @@ Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *& return idx; } +static void +vert_attrib_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index) +{ + if (slot >= VERT_ATTRIB_GENERIC0) { + *name = TGSI_SEMANTIC_GENERIC; + *index = slot - VERT_ATTRIB_GENERIC0; + return; + } + + if (slot == VERT_ATTRIB_POINT_SIZE) { + ERROR("unknown vert attrib slot %u\n", slot); + assert(false); + return; + } + + if (slot >= VERT_ATTRIB_TEX0) { + *name = TGSI_SEMANTIC_TEXCOORD; + *index = slot - VERT_ATTRIB_TEX0; + return; + } + + switch (slot) { + case VERT_ATTRIB_COLOR0: + *name = TGSI_SEMANTIC_COLOR; + *index = 0; + break; + case VERT_ATTRIB_COLOR1: + *name = TGSI_SEMANTIC_COLOR; + *index = 1; + break; + case VERT_ATTRIB_EDGEFLAG: + *name = TGSI_SEMANTIC_EDGEFLAG; + *index = 0; + break; + case VERT_ATTRIB_FOG: + *name = TGSI_SEMANTIC_FOG; + *index = 0; + break; + case VERT_ATTRIB_NORMAL: + *name = TGSI_SEMANTIC_NORMAL; + *index = 0; + break; + case VERT_ATTRIB_POS: + *name = TGSI_SEMANTIC_POSITION; + *index = 0; + break; + default: + ERROR("unknown vert attrib slot %u\n", slot); + assert(false); + break; + } +} + +static void +varying_slot_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index) +{ + if (slot >= VARYING_SLOT_PATCH0) { + *name = TGSI_SEMANTIC_PATCH; + *index = slot - VARYING_SLOT_PATCH0; + return; + } + + if (slot >= VARYING_SLOT_VAR0) { + *name = TGSI_SEMANTIC_GENERIC; + *index = slot - VARYING_SLOT_VAR0; + return; + } + + if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) { + *name = TGSI_SEMANTIC_TEXCOORD; + *index = slot - VARYING_SLOT_TEX0; + return; + } + + switch (slot) { + case VARYING_SLOT_BFC0: + *name = TGSI_SEMANTIC_BCOLOR; + *index = 0; + break; + case VARYING_SLOT_BFC1: + *name = TGSI_SEMANTIC_BCOLOR; + *index = 1; + break; + case VARYING_SLOT_CLIP_DIST0: + *name = TGSI_SEMANTIC_CLIPDIST; + *index = 0; + break; + case VARYING_SLOT_CLIP_DIST1: + *name = TGSI_SEMANTIC_CLIPDIST; + *index = 1; + break; + case VARYING_SLOT_CLIP_VERTEX: + *name = TGSI_SEMANTIC_CLIPVERTEX; + *index = 0; + break; + case VARYING_SLOT_COL0: + *name = TGSI_SEMANTIC_COLOR; + *index = 0; + break; + case VARYING_SLOT_COL1: + *name = TGSI_SEMANTIC_COLOR; + *index = 1; + break; + case VARYING_SLOT_EDGE: + *name = TGSI_SEMANTIC_EDGEFLAG; + *index = 0; + break; + case VARYING_SLOT_FACE: + *name = TGSI_SEMANTIC_FACE; + *index = 0; + break; + case VARYING_SLOT_FOGC: + *name = TGSI_SEMANTIC_FOG; + *index = 0; + break; + case VARYING_SLOT_LAYER: + *name = TGSI_SEMANTIC_LAYER; + *index = 0; + break; + case VARYING_SLOT_PNTC: + *name = TGSI_SEMANTIC_PCOORD; + *index = 0; + break; + case VARYING_SLOT_POS: + *name = TGSI_SEMANTIC_POSITION; + *index = 0; + break; +
[Mesa-dev] [PATCH v7 15/35] nvir/nir: implement CFG handling
v6: fix loops with blocks at the end nothing points to skip blocks with no instructions and no predecessors Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 270 - 1 file changed, 268 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 4bb99c6635c..c2512b01d5a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -55,8 +55,10 @@ private: typedef decltype(nir_ssa_def().index) NirSSADefIdx; typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize; typedef std::unordered_map<NirSSADefIdx, LValues> NirDefMap; + typedef std::unordered_map<decltype(nir_block().index), BasicBlock*> NirBlockMap; LValues& convert(nir_alu_dest *); + BasicBlock* convert(nir_block *); LValues& convert(nir_dest *); LValues& convert(nir_register *); LValues& convert(nir_ssa_def *); @@ -98,15 +100,46 @@ private: bool assignSlots(); bool parseNIR(); + bool visit(nir_block *); + bool visit(nir_cf_node *); + bool visit(nir_function *); + bool visit(nir_if *); + bool visit(nir_instr *); + bool visit(nir_jump_instr *); + bool visit(nir_loop *); + nir_shader *nir; NirDefMap ssaDefs; NirDefMap regDefs; + NirBlockMap blocks; + unsigned int curLoopDepth; + + BasicBlock *exit; + + union { + struct { + Value *position; + } fp; + }; }; Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) : ConverterCommon(prog, info), - nir(nir) {} + nir(nir), + curLoopDepth(0) {} + +BasicBlock * +Converter::convert(nir_block *block) +{ + NirBlockMap::iterator it = blocks.find(block->index); + if (it != blocks.end()) + return (*it).second; + + BasicBlock *bb = new BasicBlock(func); + blocks[block->index] = bb; + return bb; +} bool Converter::isFloatType(nir_alu_type type) @@ -1051,6 +1084,234 @@ Converter::parseNIR() return true; } +bool +Converter::visit(nir_function *function) +{ + /* we only support emiting the main function for now */ + assert(!strcmp(function->name, "main")); + assert(function->impl); + + /* usually the blocks will set everything up, but main is special */ + BasicBlock *entry = new BasicBlock(prog->main); + exit = new BasicBlock(prog->main); + blocks[nir_start_block(function->impl)->index] = entry; + prog->main->setEntry(entry); + prog->main->setExit(exit); + + setPosition(entry, true); + + switch (prog->getType()) { + case Program::TYPE_TESSELLATION_CONTROL: + outBase = mkOp2v( + OP_SUB, TYPE_U32, getSSA(), + mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)), + mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0))); + break; + case Program::TYPE_FRAGMENT: { + Symbol *sv = mkSysVal(SV_POSITION, 3); + fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv); + fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]); + break; + } + default: + break; + } + + nir_index_ssa_defs(function->impl); + foreach_list_typed(nir_cf_node, node, node, >impl->body) { + if (!visit(node)) + return false; + } + + bb->cfg.attach(>cfg, Graph::Edge::TREE); + setPosition(exit, true); + + /* TODO: for non main function this needs to be a OP_RETURN */ + mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; + return true; +} + +bool +Converter::visit(nir_cf_node *node) +{ + switch (node->type) { + case nir_cf_node_block: + if (!visit(nir_cf_node_as_block(node))) + return false; + break; + case nir_cf_node_if: + if (!visit(nir_cf_node_as_if(node))) + return false; + break; + case nir_cf_node_loop: + if (!visit(nir_cf_node_as_loop(node))) + return false; + break; + default: + ERROR("unknown nir_cf_node type %u\n", node->type); + return false; + } + return true; +} + +bool +Converter::visit(nir_block *block) +{ + if (!block->predecessors->entries && block->instr_list.is_empty()) + return true; + + BasicBlock *bb = convert(block); + + setPosition(bb, true); + nir_foreach_instr(insn, block) { + if (!visit(insn)) + return false; + } + return true; +} + +bool +Converter::visit(nir_if *nif) +{ + DataType sType = getSType(nif->condition, false, false); + Value *src = getSrc(>condition, 0); + + nir_block *lastThen = nir_if_last_then_block(nif); + nir_block *lastElse = nir_if_last_else_block(nif); + + assert(!lastThen->successors[1]); + assert(!lastElse->successors[1]); + + BasicBlock *ifBB = convert(nir_if_first_then_block(ni
[Mesa-dev] [PATCH v7 16/35] nvir/nir: implement nir_load_const_instr
Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 18 ++ 1 file changed, 18 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index c2512b01d5a..f4f844021a2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -106,6 +106,7 @@ private: bool visit(nir_if *); bool visit(nir_instr *); bool visit(nir_jump_instr *); + bool visit(nir_load_const_instr*); bool visit(nir_loop *); nir_shader *nir; @@ -1278,6 +1279,8 @@ Converter::visit(nir_instr *insn) switch (insn->type) { case nir_instr_type_jump: return visit(nir_instr_as_jump(insn)); + case nir_instr_type_load_const: + return visit(nir_instr_as_load_const(insn)); default: ERROR("unknown nir_instr type %u\n", insn->type); return false; @@ -1312,6 +1315,21 @@ Converter::visit(nir_jump_instr *insn) return true; } +bool +Converter::visit(nir_load_const_instr *insn) +{ + assert(insn->def.bit_size <= 64); + + LValues = convert(>def); + for (int i = 0; i < insn->def.num_components; i++) { + if (insn->def.bit_size > 32) + loadImm(newDefs[i], insn->value.u64[i]); + else + loadImm(newDefs[i], insn->value.u32[i]); + } + return true; +} + bool Converter::run() { -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v7 13/35] nvir/nir: add loadFrom and storeTo helpler
Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 72 ++ 1 file changed, 72 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 1d1c4526d2b..00ca1ae1512 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -76,6 +76,13 @@ private: bool centroid, unsigned semantics); + Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base, + uint8_t c, Value *indirect0 = nullptr, + Value *indirect1 = nullptr, bool patch = false); + void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType, +Value *src, uint8_t idx, uint8_t c, Value *indirect0 = nullptr, +Value *indirect1 = nullptr); + bool isFloatType(nir_alu_type); bool isSignedType(nir_alu_type); bool isResultFloat(nir_op); @@ -924,6 +931,71 @@ Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot) return vary[idx].slot[slot] * 4; } +Instruction * +Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def, +uint32_t base, uint8_t c, Value *indirect0, +Value *indirect1, bool patch) +{ + auto tySize = typeSizeof(ty); + + if (tySize == 8 && + (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) { + Value *lo = getSSA(); + Value *hi = getSSA(); + + Instruction *loi = + mkLoad(TYPE_U32, lo, +mkSymbol(file, i, TYPE_U32, base + c * tySize), +indirect0); + loi->setIndirect(0, 1, indirect1); + loi->perPatch = patch; + + Instruction *hii = + mkLoad(TYPE_U32, hi, +mkSymbol(file, i, TYPE_U32, base + c * tySize + 4), +indirect0); + hii->setIndirect(0, 1, indirect1); + hii->perPatch = patch; + + return mkOp2(OP_MERGE, ty, def, lo, hi); + } else { + Instruction *ld = + mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0); + ld->setIndirect(0, 1, indirect1); + ld->perPatch = patch; + return ld; + } +} + +void +Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op, + DataType ty, Value *src, uint8_t idx, uint8_t c, + Value *indirect0, Value *indirect1) +{ + uint8_t size = typeSizeof(ty); + uint32_t address = getSlotAddress(insn, idx, c); + + if (size == 8 && indirect0) { + Value *split[2]; + mkSplit(split, 4, src); + + if (op == OP_EXPORT) { + split[0] = mkMov(getSSA(), split[0], ty)->getDef(0); + split[1] = mkMov(getSSA(), split[1], ty)->getDef(0); + } + + mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0, + split[0])->perPatch = info->out[idx].patch; + mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0, + split[1])->perPatch = info->out[idx].patch; + } else { + if (op == OP_EXPORT) + src = mkMov(getSSA(size), src, ty)->getDef(0); + mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0, + src)->perPatch = info->out[idx].patch; + } +} + bool Converter::run() { -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v7 10/35] nvir/nir: track defs and provide easy access functions
v2: add helper function for indirects v4: add new getIndirect overload for easier use v5: use getSSA for ssa values we can just create the values for unassigned registers in getSrc v6: always create at least 32 bit values Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 132 + 1 file changed, 132 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 0b7a5981f73..b61c6e90b1a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -31,6 +31,9 @@ #include "codegen/nv50_ir_lowering_helper.h" #include "codegen/nv50_ir_util.h" +#include +#include + static int type_size(const struct glsl_type *type) { @@ -48,13 +51,142 @@ public: bool run(); private: + typedef std::vector<LValue*> LValues; + typedef decltype(nir_ssa_def().index) NirSSADefIdx; + typedef std::unordered_map<NirSSADefIdx, LValues> NirDefMap; + + LValues& convert(nir_alu_dest *); + LValues& convert(nir_dest *); + LValues& convert(nir_register *); + LValues& convert(nir_ssa_def *); + + Value* getSrc(nir_alu_src *, uint8_t component = 0); + Value* getSrc(nir_register *, uint8_t); + Value* getSrc(nir_src *, uint8_t, bool indirect = false); + Value* getSrc(nir_ssa_def *, uint8_t); + + uint32_t getIndirect(nir_src *, uint8_t, Value*&); + uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&); + nir_shader *nir; + + NirDefMap ssaDefs; + NirDefMap regDefs; }; Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) : ConverterCommon(prog, info), nir(nir) {} +Converter::LValues& +Converter::convert(nir_dest *dest) +{ + if (dest->is_ssa) + return convert(>ssa); + if (dest->reg.indirect) { + ERROR("no support for indirects."); + assert(false); + } + return convert(dest->reg.reg); +} + +Converter::LValues& +Converter::convert(nir_register *reg) +{ + NirDefMap::iterator it = regDefs.find(reg->index); + if (it != regDefs.end()) + return (*it).second; + + LValues newDef(reg->num_components); + for (auto i = 0u; i < reg->num_components; i++) + newDef[i] = getScratch(std::max(4, reg->bit_size / 8)); + return regDefs[reg->index] = newDef; +} + +Converter::LValues& +Converter::convert(nir_ssa_def *def) +{ + NirDefMap::iterator it = ssaDefs.find(def->index); + if (it != ssaDefs.end()) + return (*it).second; + + LValues newDef(def->num_components); + for (auto i = 0; i < def->num_components; i++) + newDef[i] = getSSA(std::max(4, def->bit_size / 8)); + return ssaDefs[def->index] = newDef; +} + +Value* +Converter::getSrc(nir_alu_src *src, uint8_t component) +{ + if (src->abs || src->negate) { + ERROR("modifiers currently not supported on nir_alu_src\n"); + assert(false); + } + return getSrc(>src, src->swizzle[component]); +} + +Value* +Converter::getSrc(nir_register *reg, uint8_t idx) +{ + NirDefMap::iterator it = regDefs.find(reg->index); + if (it == regDefs.end()) + return convert(reg)[idx]; + return (*it).second[idx]; +} + +Value* +Converter::getSrc(nir_src *src, uint8_t idx, bool indirect) +{ + if (src->is_ssa) + return getSrc(src->ssa, idx); + + if (src->reg.indirect) { + if (indirect) + return getSrc(src->reg.indirect, idx); + ERROR("no support for indirects."); + assert(false); + return nullptr; + } + + return getSrc(src->reg.reg, idx); +} + +Value* +Converter::getSrc(nir_ssa_def *src, uint8_t idx) +{ + NirDefMap::iterator it = ssaDefs.find(src->index); + if (it == ssaDefs.end()) { + ERROR("SSA value %u not found\n", src->index); + assert(false); + return nullptr; + } + return (*it).second[idx]; +} + +uint32_t +Converter::getIndirect(nir_src *src, uint8_t idx, Value *) +{ + nir_const_value *offset = nir_src_as_const_value(*src); + + if (offset) { + indirect = nullptr; + return offset->u32[0]; + } + + indirect = getSrc(src, idx, true); + return 0; +} + +uint32_t +Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *) +{ + auto idx = nir_intrinsic_base(insn) + getIndirect(>src[s], c, indirect); + if (indirect) + indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(nullptr, 4)); + return idx; +} + bool Converter::run() { -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v7 08/35] nouveau: fix nir and TGSI shader cache collision
Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/gallium/drivers/nouveau/nouveau_screen.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index 2598c78a45b..655d2d090f6 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -156,9 +156,13 @@ nouveau_disk_cache_create(struct nouveau_screen *screen) _timestamp)) { res = asprintf(_str, "%u", mesa_timestamp); if (res != -1) { + uint64_t shader_debug_flags = 0; + if (screen->prefer_nir) +shader_debug_flags |= 1 << 0; + screen->disk_shader_cache = disk_cache_create(nouveau_screen_get_name(>base), - timestamp_str, 0); + timestamp_str, shader_debug_flags); free(timestamp_str); } } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v7 05/35] nvir: add lowering helper
this is mostly usefull for lazy IR converters not wanting to deal with 64 bit lowering and other illegal stuff v5: also handle SAT v6: rename type variables fixed lowering of NEG add lowering of NOT Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/gallium/drivers/nouveau/Makefile.sources | 2 + .../nouveau/codegen/nv50_ir_lowering_helper.cpp| 275 + .../nouveau/codegen/nv50_ir_lowering_helper.h | 53 src/gallium/drivers/nouveau/meson.build| 2 + 4 files changed, 332 insertions(+) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index fee5e59522e..ec344c63169 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -122,6 +122,8 @@ NV50_CODEGEN_SOURCES := \ codegen/nv50_ir_graph.h \ codegen/nv50_ir.h \ codegen/nv50_ir_inlines.h \ + codegen/nv50_ir_lowering_helper.cpp \ + codegen/nv50_ir_lowering_helper.h \ codegen/nv50_ir_lowering_nv50.cpp \ codegen/nv50_ir_peephole.cpp \ codegen/nv50_ir_print.cpp \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp new file mode 100644 index 000..9373531b0b1 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp @@ -0,0 +1,275 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Karol Herbst <kher...@redhat.com> + */ + +#include "codegen/nv50_ir_lowering_helper.h" + +namespace nv50_ir { + +bool +LoweringHelper::visit(Instruction *insn) +{ + switch (insn->op) { + case OP_ABS: + return handleABS(insn); + case OP_CVT: + return handleCVT(insn); + case OP_MAX: + case OP_MIN: + return handleMAXMIN(insn); + case OP_MOV: + return handleMOV(insn); + case OP_NEG: + return handleNEG(insn); + case OP_SAT: + return handleSAT(insn); + case OP_SLCT: + return handleSLCT(insn->asCmp()); + case OP_AND: + case OP_NOT: + case OP_OR: + case OP_XOR: + return handleLogOp(insn); + default: + return true; + } +} + +bool +LoweringHelper::handleABS(Instruction *insn) +{ + DataType dTy = insn->dType; + if (!(dTy == TYPE_U64 || dTy == TYPE_S64)) + return true; + + bld.setPosition(insn, false); + + Value *neg = bld.getSSA(8); + Value *negComp[2], *srcComp[2]; + Value *lo = bld.getSSA(), *hi = bld.getSSA(); + bld.mkOp2(OP_SUB, dTy, neg, bld.mkImm((uint64_t)0), insn->getSrc(0)); + bld.mkSplit(negComp, 4, neg); + bld.mkSplit(srcComp, 4, insn->getSrc(0)); + bld.mkCmp(OP_SLCT, CC_LT, TYPE_S32, lo, TYPE_S32, negComp[0], srcComp[0], srcComp[1]); + bld.mkCmp(OP_SLCT, CC_LT, TYPE_S32, hi, TYPE_S32, negComp[1], srcComp[1], srcComp[1]); + insn->op = OP_MERGE; + insn->setSrc(0, lo); + insn->setSrc(1, hi); + + return true; +} + +bool +LoweringHelper::handleCVT(Instruction *insn) +{ + DataType dTy = insn->dType; + DataType sTy = insn->sType; + + if (typeSizeof(dTy) <= 4 && typeSizeof(sTy) <= 4) + return true; + + bld.setPosition(insn, false); + + if ((dTy == TYPE_S32 && sTy == TYPE_S64) || + (dTy == TYPE_U32 && sTy == TYPE_U64)) { + Value *src[2]; + bld.mkSplit(src, 4, insn->getSrc(0)); + insn->op = OP_MOV; + insn->setSrc(0, src[0]); + } else if (dTy == TYPE_S64 && sTy == TYPE_S32) { + Value *tmp = bld.getSSA(); + bld.mkOp2(OP_SHR, TYPE_S32, tmp, insn->getSrc(0), bld.loadImm(bld.getSSA(), 31)); + insn-&g
[Mesa-dev] [PATCH v7 11/35] nvir/nir: add nir type helper functions
v4: treat imul as unsigned v5: remove pointless !! v7: inot is unsigned as well Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 116 + 1 file changed, 116 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index b61c6e90b1a..89c55a08ef8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -53,6 +53,7 @@ public: private: typedef std::vector<LValue*> LValues; typedef decltype(nir_ssa_def().index) NirSSADefIdx; + typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize; typedef std::unordered_map<NirSSADefIdx, LValues> NirDefMap; LValues& convert(nir_alu_dest *); @@ -68,6 +69,18 @@ private: uint32_t getIndirect(nir_src *, uint8_t, Value*&); uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&); + bool isFloatType(nir_alu_type); + bool isSignedType(nir_alu_type); + bool isResultFloat(nir_op); + bool isResultSigned(nir_op); + + DataType getDType(nir_alu_instr*); + DataType getDType(nir_intrinsic_instr*); + DataType getDType(nir_op, NirSSADefBitSize); + + std::vector getSTypes(nir_alu_instr*); + DataType getSType(nir_src&, bool isFloat, bool isSigned); + nir_shader *nir; NirDefMap ssaDefs; @@ -78,6 +91,109 @@ Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) : ConverterCommon(prog, info), nir(nir) {} +bool +Converter::isFloatType(nir_alu_type type) +{ + return nir_alu_type_get_base_type(type) == nir_type_float; +} + +bool +Converter::isSignedType(nir_alu_type type) +{ + return nir_alu_type_get_base_type(type) == nir_type_int; +} + +bool +Converter::isResultFloat(nir_op op) +{ + const nir_op_info = nir_op_infos[op]; + if (info.output_type != nir_type_invalid) + return isFloatType(info.output_type); + + ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name); + assert(false); + return true; +} + +bool +Converter::isResultSigned(nir_op op) +{ + switch (op) { + /* there is no umul and we get wrong results if the treat all muls as signed */ + case nir_op_imul: + case nir_op_inot: + return false; + default: + const nir_op_info = nir_op_infos[op]; + if (info.output_type != nir_type_invalid) + return isSignedType(info.output_type); + ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name); + assert(false); + return true; + } +} + +DataType +Converter::getDType(nir_alu_instr *insn) +{ + if (insn->dest.dest.is_ssa) + return getDType(insn->op, insn->dest.dest.ssa.bit_size); + else + return getDType(insn->op, insn->dest.dest.reg.reg->bit_size); +} + +DataType +Converter::getDType(nir_intrinsic_instr *insn) +{ + if (insn->dest.is_ssa) + return typeOfSize(insn->dest.ssa.bit_size / 8, false, false); + else + return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false); +} + +DataType +Converter::getDType(nir_op op, Converter::NirSSADefBitSize bitSize) +{ + DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op)); + if (ty == TYPE_NONE) { + ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize); + assert(false); + } + return ty; +} + +std::vector +Converter::getSTypes(nir_alu_instr *insn) +{ + const nir_op_info = nir_op_infos[insn->op]; + std::vector res(info.num_inputs); + + for (auto i = 0u; i < info.num_inputs; ++i) { + if (info.input_types[i] != nir_type_invalid) { + res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i])); + } else { + ERROR("getSType not implemented for %s idx %u\n", info.name, i); + assert(false); + res[i] = TYPE_NONE; + break; + } + } + + return res; +} + +DataType +Converter::getSType(nir_src , bool isFloat, bool isSigned) +{ + NirSSADefBitSize bitSize; + if (src.is_ssa) + bitSize = src.ssa->bit_size; + else + bitSize = src.reg.reg->bit_size; + + return typeOfSize(bitSize / 8, isFloat, isSigned); +} + Converter::LValues& Converter::convert(nir_dest *dest) { -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v7 03/35] nvir: print the shader type when dumping headers
this makes debugging the shader header a little easier Signed-off-by: Karol Herbst <kher...@redhat.com> Acked-by: Pierre Moreau <pierre.mor...@free.fr> --- src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 9520d984bb3..3a11534df83 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -551,6 +551,7 @@ nvc0_program_dump(struct nvc0_program *prog) unsigned pos; if (prog->type != PIPE_SHADER_COMPUTE) { + debug_printf("dumping HDR for type %i\n", prog->type); for (pos = 0; pos < ARRAY_SIZE(prog->hdr); ++pos) debug_printf("HDR[%02"PRIxPTR"] = 0x%08x\n", pos * sizeof(prog->hdr[0]), prog->hdr[pos]); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v7 04/35] nvir: move common converter code in base class
v2: remove TGSI related bits Signed-off-by: Karol Herbst <kher...@redhat.com> Reviewed-by: Pierre Moreau <pierre.mor...@free.fr> --- src/gallium/drivers/nouveau/Makefile.sources | 2 + .../nouveau/codegen/nv50_ir_from_common.cpp| 107 + .../drivers/nouveau/codegen/nv50_ir_from_common.h | 58 +++ .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 106 +--- src/gallium/drivers/nouveau/meson.build| 2 + 5 files changed, 172 insertions(+), 103 deletions(-) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 65f08c7d8d8..fee5e59522e 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -115,6 +115,8 @@ NV50_CODEGEN_SOURCES := \ codegen/nv50_ir_build_util.h \ codegen/nv50_ir_driver.h \ codegen/nv50_ir_emit_nv50.cpp \ + codegen/nv50_ir_from_common.cpp \ + codegen/nv50_ir_from_common.h \ codegen/nv50_ir_from_tgsi.cpp \ codegen/nv50_ir_graph.cpp \ codegen/nv50_ir_graph.h \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp new file mode 100644 index 000..0ad6087e588 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp @@ -0,0 +1,107 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir_from_common.h" + +namespace nv50_ir { + +ConverterCommon::ConverterCommon(Program *prog, nv50_ir_prog_info *info) + : BuildUtil(prog), + info(info) {} + +ConverterCommon::Subroutine * +ConverterCommon::getSubroutine(unsigned ip) +{ + std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip); + + if (it == sub.map.end()) + it = sub.map.insert(std::make_pair( + ip, Subroutine(new Function(prog, "SUB", ip.first; + + return >second; +} + +ConverterCommon::Subroutine * +ConverterCommon::getSubroutine(Function *f) +{ + unsigned ip = f->getLabel(); + std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip); + + if (it == sub.map.end()) + it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first; + + return >second; +} + +uint8_t +ConverterCommon::translateInterpMode(const struct nv50_ir_varying *var, operation& op) +{ + uint8_t mode = NV50_IR_INTERP_PERSPECTIVE; + + if (var->flat) + mode = NV50_IR_INTERP_FLAT; + else + if (var->linear) + mode = NV50_IR_INTERP_LINEAR; + else + if (var->sc) + mode = NV50_IR_INTERP_SC; + + op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC) + ? OP_PINTERP : OP_LINTERP; + + if (var->centroid) + mode |= NV50_IR_INTERP_CENTROID; + + return mode; +} + +void +ConverterCommon::handleUserClipPlanes() +{ + Value *res[8]; + int n, i, c; + + for (c = 0; c < 4; ++c) { + for (i = 0; i < info->io.genUserClip; ++i) { + Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot, +TYPE_F32, info->io.ucpBase + i * 16 + c * 4); + Value *ucp = mkLoadv(TYPE_F32, sym, NULL); + if (c == 0) +res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp); + else +mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]); + } + } + + const int first = info->numOutputs - (info->io.genUserClip + 3) / 4; + + for (i = 0; i < info->io.genUserClip; ++i) { + n = i / 4 + first; + c = i % 4; + Symbol *sym = + mkSymbol(FILE_SHADER_OUTPUT, 0, TYP
[Mesa-dev] [PATCH v7 06/35] nouveau: add support for nir
not all those nir options are actually required, it just made the work a little easier. v2: fix asserts parse compute shaders don't lower bitfield_insert v3: fix memory leak v4: don't lower fmod32 v5: set lower_all_io_to_temps to false fix memory leak because we take over ownership of the nir shader merge: use the lowering helper v6: include TGSI debug header for proper assert call add nv50 support v7: fix Automake build Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/gallium/drivers/nouveau/Automake.inc | 3 + src/gallium/drivers/nouveau/Makefile.am| 5 ++ src/gallium/drivers/nouveau/Makefile.sources | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir.cpp| 3 + src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 76 ++ src/gallium/drivers/nouveau/meson.build| 9 +-- src/gallium/drivers/nouveau/nv50/nv50_program.c| 19 +- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 40 src/gallium/drivers/nouveau/nv50/nv50_state.c | 31 - src/gallium/drivers/nouveau/nvc0/nvc0_program.c| 18 - src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 42 +++- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 27 +++- 13 files changed, 261 insertions(+), 14 deletions(-) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp diff --git a/src/gallium/drivers/nouveau/Automake.inc b/src/gallium/drivers/nouveau/Automake.inc index 1d383fcb7b1..657790494dc 100644 --- a/src/gallium/drivers/nouveau/Automake.inc +++ b/src/gallium/drivers/nouveau/Automake.inc @@ -8,4 +8,7 @@ TARGET_LIB_DEPS += \ $(NOUVEAU_LIBS) \ $(LIBDRM_LIBS) +TARGET_COMPILER_LIB_DEPS = \ + $(top_builddir)/src/compiler/nir/libnir.la + endif diff --git a/src/gallium/drivers/nouveau/Makefile.am b/src/gallium/drivers/nouveau/Makefile.am index f6126b54481..478dfcf437b 100644 --- a/src/gallium/drivers/nouveau/Makefile.am +++ b/src/gallium/drivers/nouveau/Makefile.am @@ -25,6 +25,10 @@ include $(top_srcdir)/src/gallium/Automake.inc AM_CPPFLAGS = \ -I$(top_srcdir)/include/drm-uapi \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_srcdir)/src/compiler/nir \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ $(GALLIUM_DRIVER_CFLAGS) \ $(LIBDRM_CFLAGS) \ $(NOUVEAU_CFLAGS) @@ -47,6 +51,7 @@ nouveau_compiler_SOURCES = \ nouveau_compiler_LDADD = \ libnouveau.la \ + $(top_builddir)/src/compiler/nir/libnir.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ $(top_builddir)/src/util/libmesautil.la \ $(GALLIUM_COMMON_LIB_DEPS) diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index ec344c63169..c6a1aff7110 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -117,6 +117,7 @@ NV50_CODEGEN_SOURCES := \ codegen/nv50_ir_emit_nv50.cpp \ codegen/nv50_ir_from_common.cpp \ codegen/nv50_ir_from_common.h \ + codegen/nv50_ir_from_nir.cpp \ codegen/nv50_ir_from_tgsi.cpp \ codegen/nv50_ir_graph.cpp \ codegen/nv50_ir_graph.h \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index c987da99085..b3efef72b0f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1231,6 +1231,9 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) prog->optLevel = info->optLevel; switch (info->bin.sourceRep) { + case PIPE_SHADER_IR_NIR: + ret = prog->makeFromNIR(info) ? 0 : -2; + break; case PIPE_SHADER_IR_TGSI: ret = prog->makeFromTGSI(info) ? 0 : -2; break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index f4f3c708886..e5b4592a61e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -1255,6 +1255,7 @@ public: inline void del(Function *fn, int& id) { allFuncs.remove(id); } inline void add(Value *rval, int& id) { allRValues.insert(rval, id); } + bool makeFromNIR(struct nv50_ir_prog_info *); bool makeFromTGSI(struct nv50_ir_prog_info *); bool convertToSSA(); bool optimizeSSA(int level); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp new file mode 100644 index 000..b22c62fd434 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -0,0 +1,76 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files
[Mesa-dev] [PATCH v7 00/35] Nouveau NIR support
Changes to v6: * fixed automake build * fixed shader cache collision with TGSI shaders * fixed handling of inot * fixed location of images * fixed load_output of fragment shaders (FBFETCH) * added load_barycentric_sample handling * some preparation for bindless_texture support There will be some follow up patches to add support for: * 4 constant offsets in TG4 * bindless_texture support for samplers and images Review is important for patches 1-8, all the other paches touch the from_nir file only. Connor Abbott (1): nv50/ir/ra: Fix copying compound for moves Karol Herbst (34): st/glsl_to_nir: run lower_output_reads on !PIPE_CAP_TGSI_CAN_READ_OUTPUTS nvir: print the shader type when dumping headers nvir: move common converter code in base class nvir: add lowering helper nouveau: add support for nir nouveau: add env var to make nir default nouveau: fix nir and TGSI shader cache collision nvir/nir: run some passes to make the conversion easier nvir/nir: track defs and provide easy access functions nvir/nir: add nir type helper functions nvir/nir: run assignSlots nvir/nir: add loadFrom and storeTo helpler nvir/nir: parse NIR shader info nvir/nir: implement CFG handling nvir/nir: implement nir_load_const_instr nvir/nir: add skeleton for nir_intrinsic_instr nvir/nir: implement nir_alu_instr handling nvir/nir: implement nir_intrinsic_load_uniform nvir/nir: implement nir_intrinsic_store_(per_vertex_)output nvir/nir: implement load_(interpolated_)input/output nvir/nir: implement intrinsic_discard(_if) nvir/nir: implement loading system values nvir/nir: implement nir_ssa_undef_instr nvir/nir: implement nir_instr_type_tex nvir/nir: add getOperation for intrinsics nvir/nir: implement vote and ballot nvir/nir: implement variable indexing nvir/nir: implement geometry shader nir_intrinsics nvir/nir: implement nir_intrinsic_load_ubo nvir/nir: implement ssbo intrinsics nvir/nir: implement images nvir/nir: add memory barriers nvir/nir: implement load_per_vertex_output nvir/nir: implement intrinsic shader_clock src/gallium/drivers/nouveau/Automake.inc |3 + src/gallium/drivers/nouveau/Makefile.am|5 + src/gallium/drivers/nouveau/Makefile.sources |5 + src/gallium/drivers/nouveau/codegen/nv50_ir.cpp|3 + src/gallium/drivers/nouveau/codegen/nv50_ir.h |1 + .../nouveau/codegen/nv50_ir_from_common.cpp| 107 + .../drivers/nouveau/codegen/nv50_ir_from_common.h | 58 + .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 3145 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 106 +- .../nouveau/codegen/nv50_ir_lowering_helper.cpp| 275 ++ .../nouveau/codegen/nv50_ir_lowering_helper.h | 53 + src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 60 +- src/gallium/drivers/nouveau/meson.build| 13 +- src/gallium/drivers/nouveau/nouveau_screen.c | 11 +- src/gallium/drivers/nouveau/nouveau_screen.h |2 + src/gallium/drivers/nouveau/nv50/nv50_program.c| 19 +- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 44 +- src/gallium/drivers/nouveau/nv50/nv50_state.c | 31 +- src/gallium/drivers/nouveau/nvc0/nvc0_program.c| 19 +- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 61 +- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 27 +- src/mesa/state_tracker/st_glsl_to_nir.cpp |6 + 22 files changed, 3908 insertions(+), 146 deletions(-) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.h -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v7 02/35] nv50/ir/ra: Fix copying compound for moves
From: Connor Abbott <cwabbo...@gmail.com> In order to reduce moves when coalescing multiple registers into a larger register, RA will try to coalesce MERGE instructions with their definitions. For example, for something like this in GLSL: uint a = ...; uint b = ...; uint64 x = packUint2x32(a, b); The compiler will try to coalesce x with a and b, in the same way as something like: uint a = ...; uint b = ...; ... uint x = phi(a, b); with the crucial difference that the definitions of a and b only clobber part of the register, instead of the whole thing. This information is carried through the compound flag and compMask bitmask. If compound is set, then the value has been coalesced in such a way that not all the defs clobber the entire register. The compMask bitmask describes which subregister each def clobbers, although it does it in a slightly convoluted way. It's an invariant that once compound is set on one def, it must be set for all the defs in a given coalesced value. In more detail, the constraints pass will first create extra moves: uint a = ...; uint b = ...; uint a' = a; uint b' = b; uint64 x = packUint2x32(a', b'); and then RA will merge values involved in MERGE/SPLIT instructions, merging x with a' and b' and making the combined value compound -- this is relatively simple, and will always succeed since we just created a' and b', so they never interfere with x, and x has no other definitions, since we haven't started coalescing moves yet. Basically, we just replaced the MERGE instruction with an equivalent sequence of partial writes to the destination. The tricky part comes when we try to merge a' with a and b' with b. We need to transfer the compound information from a' to a and b' to b, which copyCompound() does, but we also need to transfer it to any defs coalesced with a and b, which the code failed to do. Similarly, if x is the argument to a phi instruction, then when we try to merge it with other arguments to the same phi by coalescing moves, we'd have problems guaranteeing that all the other merged defs stay up-to-date. One tricky part of fixing this is that in order to properly propagate the information from a' to a, we need to do it before the defs for a and a' are merged in coalesceValues(), since we need to know which defs are merged with a but not a' -- after coalesceValues() returns, all the defs have been combined, so we don't know which is which. I took the approach of calling copyCompound() inside coalesceValues(), instead of afterwards. Cc: Ilia Mirkin <imir...@alum.mit.edu> Cc: Karol Herbst <kher...@redhat.com> Tested-by: Karol Herbst <kher...@redhat.com> Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 60 ++ 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 3a0e56e1385..df3116a6d73 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -890,6 +890,35 @@ GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval) livei.insert(lval->livei); } +// Used when coalescing moves. The non-compound value will become one, e.g.: +// mov b32 $r0 $r2/ merge b64 $r0d { $r0 $r1 } +// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d +static inline void copyCompound(Value *dst, Value *src) +{ + LValue *ldst = dst->asLValue(); + LValue *lsrc = src->asLValue(); + + if (ldst->compound && !lsrc->compound) { + LValue *swap = lsrc; + lsrc = ldst; + ldst = swap; + } + + assert(!ldst->compound); + + if (lsrc->compound) { + Value *dstRep = ldst->join; + for (Value::DefIterator d = dstRep->defs.begin(); d != dstRep->defs.end(); + ++d) { + LValue *ldst = (*d)->get()->asLValue(); + if (!ldst->compound) +ldst->compMask = 0xff; + ldst->compound = 1; + ldst->compMask &= lsrc->compMask; + } + } +} + bool GCRA::coalesceValues(Value *dst, Value *src, bool force) { @@ -932,9 +961,16 @@ GCRA::coalesceValues(Value *dst, Value *src, bool force) if (!force && nRep->livei.overlaps(nVal->livei)) return false; + // TODO: Handle this case properly. + if (!force && rep->compound && val->compound) + return false; + INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n", rep->id, rep->reg.data.id, val->id); + if (!force) + copyCompound(dst, src); + // set join pointer of all values joined with val for (Value::DefIterator def = val->defs.begin(); def != val->defs.end(); ++def) @@ -997,24 +1033,6 @@ static inline uint8_t makeCompMask(int compSize, int base, int size) } } -// Used when
[Mesa-dev] [PATCH v7 01/35] st/glsl_to_nir: run lower_output_reads on !PIPE_CAP_TGSI_CAN_READ_OUTPUTS
this is required for Drivers which don't allow reading from outputs. Reviewed-by: Timothy Arceri <tarc...@itsqueeze.com> Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/mesa/state_tracker/st_glsl_to_nir.cpp | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index bcf6a7ceb6a..6502aec370f 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -44,6 +44,7 @@ #include "compiler/glsl_types.h" #include "compiler/glsl/glsl_to_nir.h" #include "compiler/glsl/ir.h" +#include "compiler/glsl/ir_optimization.h" #include "compiler/glsl/string_to_uint_map.h" @@ -553,6 +554,7 @@ st_nir_get_mesa_program(struct gl_context *ctx, struct gl_linked_shader *shader) { struct st_context *st = st_context(ctx); + struct pipe_screen *pscreen = ctx->st->pipe->screen; struct gl_program *prog; validate_ir_tree(shader->ir); @@ -565,6 +567,10 @@ st_nir_get_mesa_program(struct gl_context *ctx, _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader, prog->Parameters); + /* Remove reads from output registers. */ + if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS)) + lower_output_reads(shader->Stage, shader->ir); + if (ctx->_Shader->Flags & GLSL_DUMP) { _mesa_log("\n"); _mesa_log("GLSL IR for linked %s program %d:\n", -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v7 07/35] nouveau: add env var to make nir default
v2: allow for non debug builds as well v3: move reading out env var more global disable tg4 with multiple offsets with nir disable caps for 64 bit types v6: nv50 support disable MS images disable bindless textures Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/gallium/drivers/nouveau/nouveau_screen.c | 5 + src/gallium/drivers/nouveau/nouveau_screen.h | 2 ++ src/gallium/drivers/nouveau/nv50/nv50_screen.c | 4 +++- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 19 +-- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index c144b39b2dd..2598c78a45b 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -176,9 +176,14 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) union nouveau_bo_config mm_config; char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG"); + char *use_nir = getenv("NV50_PROG_USE_NIR"); + if (nv_dbg) nouveau_mesa_debug = atoi(nv_dbg); + if (use_nir) + screen->prefer_nir = strtol(use_nir, NULL, 0) == 1; + /* These must be set before any failure is possible, as the cleanup * paths assume they're responsible for deleting them. */ diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index e4fbae99ca4..1229b66b26f 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -62,6 +62,8 @@ struct nouveau_screen { struct disk_cache *disk_shader_cache; + bool prefer_nir; + #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS union { uint64_t v[29]; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index e91ea8d08c1..6f0a30ea026 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -311,6 +311,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader, enum pipe_shader_cap param) { + const struct nouveau_screen *screen = nouveau_screen(pscreen); + switch (shader) { case PIPE_SHADER_VERTEX: case PIPE_SHADER_GEOMETRY: @@ -364,7 +366,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: return MIN2(16, PIPE_MAX_SAMPLERS); case PIPE_SHADER_CAP_PREFERRED_IR: - return PIPE_SHADER_IR_TGSI; + return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 15662093eb6..1f558aeaf4b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -90,9 +90,11 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen, if (bindings & PIPE_BIND_SHADER_IMAGE) { if (sample_count > 0 && - nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS) { + (nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS || + nouveau_screen(pscreen)->prefer_nir)) { /* MS images are currently unsupported on Maxwell because they have to * be handled explicitly. */ + /* MS images are currently unsupported with NIR */ return false; } @@ -112,7 +114,8 @@ static int nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; - struct nouveau_device *dev = nouveau_screen(pscreen)->device; + const struct nouveau_screen *screen = nouveau_screen(pscreen); + struct nouveau_device *dev = screen->device; switch (param) { /* non-boolean caps */ @@ -216,7 +219,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: - case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: @@ -257,6 +259,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: case PIPE_CAP_QUERY_SO_OVERFLOW: return 1; + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + /* TODO: nir doesn't support tg4 with multiple offsets */ + return screen->prefer_nir ? 0 : 1; case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0; case PIPE_CAP_TGSI_
Re: [Mesa-dev] [PATCH v2 2/3] nir: add support for bindless_texture samplers
On Thu, Apr 12, 2018 at 6:33 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > On Thu, Apr 12, 2018 at 7:36 AM, Karol Herbst <kher...@redhat.com> wrote: >> >> On Tue, Apr 10, 2018 at 5:10 PM, Jason Ekstrand <ja...@jlekstrand.net> >> wrote: >> > On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst <kher...@redhat.com> >> > wrote: >> >> >> >> v2: add both texture and sampler handles >> >> >> >> Signed-off-by: Karol Herbst <kher...@redhat.com> >> >> --- >> >> src/compiler/glsl/glsl_to_nir.cpp | 17 +++-- >> >> src/compiler/nir/nir.h| 2 ++ >> >> src/compiler/nir/nir_print.c | 6 ++ >> >> 3 files changed, 23 insertions(+), 2 deletions(-) >> >> >> >> diff --git a/src/compiler/glsl/glsl_to_nir.cpp >> >> b/src/compiler/glsl/glsl_to_nir.cpp >> >> index dbb58d82e8f..9f233637306 100644 >> >> --- a/src/compiler/glsl/glsl_to_nir.cpp >> >> +++ b/src/compiler/glsl/glsl_to_nir.cpp >> >> @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir) >> >> { >> >> unsigned num_srcs; >> >> nir_texop op; >> >> + bool bindless = >> >> ir->sampler->variable_referenced()->contains_bindless(); >> > >> > >> > What happens if I have a uniform struct containing both a regular >> > sampler >> > and a bindless sampler? I think this should be possible. >> > >> >> well currently mesa just fails to compile, but even if it would I >> don't see a way how we know with a ir_dereference if we reference a >> bindless or bound sampler. >> >> The glsl_type doesn't tell us either and maybe it makes sense to add a >> is_bindless method to glsl_type so that we can use it in places like >> here? ir->sampler->type gives me the sampler type, but lacks the >> information if it is bindless or not. Any thoughts? > > > That seems like it's probably reasonable. I'm not sure if we really want > different types. Another option would be to handle it as a layout qualifier > on the structure type fields. I'm not sure which is better. > I think we should add a field and add a is_opaque method to fix glsl_type::contains_opaque, which is also broken, but we could do that with a new type as well :( >> >> >> >> >> + >> >> switch (ir->op) { >> >> case ir_tex: >> >>op = nir_texop_tex; >> >> @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir) >> >>num_srcs++; >> >> if (ir->offset != NULL) >> >>num_srcs++; >> >> + if (bindless) >> >> + num_srcs++; >> >> >> >> nir_tex_instr *instr = nir_tex_instr_create(this->shader, >> >> num_srcs); >> >> >> >> @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir) >> >>unreachable("not reached"); >> >> } >> >> >> >> - instr->texture = evaluate_deref(>instr, ir->sampler); >> >> - >> >> unsigned src_number = 0; >> >> >> >> + /* for bindless we use the texture handle src */ >> >> + if (bindless) { >> >> + instr->texture = NULL; >> >> + instr->src[src_number].src = >> >> + nir_src_for_ssa(evaluate_rvalue(ir->sampler)); >> >> + instr->src[src_number].src_type = nir_tex_src_texture_handle; >> >> + src_number++; >> >> + } else { >> >> + instr->texture = evaluate_deref(>instr, ir->sampler); >> >> + } >> >> + >> >> if (ir->coordinate != NULL) { >> >>instr->coord_components = ir->coordinate->type->vector_elements; >> >>instr->src[src_number].src = >> >> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h >> >> index f33049d7134..e395352f89c 100644 >> >> --- a/src/compiler/nir/nir.h >> >> +++ b/src/compiler/nir/nir.h >> >> @@ -1218,6 +1218,8 @@ typedef enum { >> >> nir_tex_src_texture_offset, /* < dynamically uniform indirect >> >> offset >> >> */ >> >> nir_tex_src_sampler_offset, /* < dynamically uniform indirect >> >> offset >> >> */ >> >> nir_tex_src_plane, /* < selects plane for planar textures >> >> */ >> >> + nir_tex_src_texture_handle, /* < handle for bindless texture */ >> >> + nir_tex_src_sampler_handle, /* < handle for bindless sampler */ >> >> nir_num_tex_src_types >> >> } nir_tex_src_type; >> >> >> >> diff --git a/src/compiler/nir/nir_print.c >> >> b/src/compiler/nir/nir_print.c >> >> index 21f13097651..52f20b1eb10 100644 >> >> --- a/src/compiler/nir/nir_print.c >> >> +++ b/src/compiler/nir/nir_print.c >> >> @@ -778,6 +778,12 @@ print_tex_instr(nir_tex_instr *instr, print_state >> >> *state) >> >>case nir_tex_src_plane: >> >> fprintf(fp, "(plane)"); >> >> break; >> >> + case nir_tex_src_texture_handle: >> >> + fprintf(fp, "(texture_handle)"); >> >> + break; >> >> + case nir_tex_src_sampler_handle: >> >> + fprintf(fp, "(sampler_handle)"); >> >> + break; >> >> >> >>default: >> >> unreachable("Invalid texture source type"); >> >> -- >> >> 2.14.3 >> >> >> > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 2/3] nir: add support for bindless_texture samplers
On Tue, Apr 10, 2018 at 5:10 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst <kher...@redhat.com> wrote: >> >> v2: add both texture and sampler handles >> >> Signed-off-by: Karol Herbst <kher...@redhat.com> >> --- >> src/compiler/glsl/glsl_to_nir.cpp | 17 +++-- >> src/compiler/nir/nir.h| 2 ++ >> src/compiler/nir/nir_print.c | 6 ++ >> 3 files changed, 23 insertions(+), 2 deletions(-) >> >> diff --git a/src/compiler/glsl/glsl_to_nir.cpp >> b/src/compiler/glsl/glsl_to_nir.cpp >> index dbb58d82e8f..9f233637306 100644 >> --- a/src/compiler/glsl/glsl_to_nir.cpp >> +++ b/src/compiler/glsl/glsl_to_nir.cpp >> @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir) >> { >> unsigned num_srcs; >> nir_texop op; >> + bool bindless = >> ir->sampler->variable_referenced()->contains_bindless(); > > > What happens if I have a uniform struct containing both a regular sampler > and a bindless sampler? I think this should be possible. > well currently mesa just fails to compile, but even if it would I don't see a way how we know with a ir_dereference if we reference a bindless or bound sampler. The glsl_type doesn't tell us either and maybe it makes sense to add a is_bindless method to glsl_type so that we can use it in places like here? ir->sampler->type gives me the sampler type, but lacks the information if it is bindless or not. Any thoughts? >> >> + >> switch (ir->op) { >> case ir_tex: >>op = nir_texop_tex; >> @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir) >>num_srcs++; >> if (ir->offset != NULL) >>num_srcs++; >> + if (bindless) >> + num_srcs++; >> >> nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); >> >> @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir) >>unreachable("not reached"); >> } >> >> - instr->texture = evaluate_deref(>instr, ir->sampler); >> - >> unsigned src_number = 0; >> >> + /* for bindless we use the texture handle src */ >> + if (bindless) { >> + instr->texture = NULL; >> + instr->src[src_number].src = >> + nir_src_for_ssa(evaluate_rvalue(ir->sampler)); >> + instr->src[src_number].src_type = nir_tex_src_texture_handle; >> + src_number++; >> + } else { >> + instr->texture = evaluate_deref(>instr, ir->sampler); >> + } >> + >> if (ir->coordinate != NULL) { >>instr->coord_components = ir->coordinate->type->vector_elements; >>instr->src[src_number].src = >> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h >> index f33049d7134..e395352f89c 100644 >> --- a/src/compiler/nir/nir.h >> +++ b/src/compiler/nir/nir.h >> @@ -1218,6 +1218,8 @@ typedef enum { >> nir_tex_src_texture_offset, /* < dynamically uniform indirect offset >> */ >> nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset >> */ >> nir_tex_src_plane, /* < selects plane for planar textures */ >> + nir_tex_src_texture_handle, /* < handle for bindless texture */ >> + nir_tex_src_sampler_handle, /* < handle for bindless sampler */ >> nir_num_tex_src_types >> } nir_tex_src_type; >> >> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c >> index 21f13097651..52f20b1eb10 100644 >> --- a/src/compiler/nir/nir_print.c >> +++ b/src/compiler/nir/nir_print.c >> @@ -778,6 +778,12 @@ print_tex_instr(nir_tex_instr *instr, print_state >> *state) >>case nir_tex_src_plane: >> fprintf(fp, "(plane)"); >> break; >> + case nir_tex_src_texture_handle: >> + fprintf(fp, "(texture_handle)"); >> + break; >> + case nir_tex_src_sampler_handle: >> + fprintf(fp, "(sampler_handle)"); >> + break; >> >>default: >> unreachable("Invalid texture source type"); >> -- >> 2.14.3 >> > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars
On Tue, Apr 10, 2018 at 6:01 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > On Tue, Apr 10, 2018 at 8:35 AM, Karol Herbst <kher...@redhat.com> wrote: >> >> On Tue, Apr 10, 2018 at 5:12 PM, Jason Ekstrand <ja...@jlekstrand.net> >> wrote: >> > I still don't see anything to make nir_validate not fail out on you if >> > it >> > sees a read or a write to/from an IMAGE or SAMPLER. >> > >> >> what kind of glsl code are you talking about here? I wrote some tests >> and things just seem to work out. I wasn't able to hit any other >> issues. > > > Were they tests where GLSL was able to copy propagate such that NIR never > saw a write to the image/sampler variable? > Well the trivial one is where you directly consume the uniform. >> >> > On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst <kher...@redhat.com> >> > wrote: >> >> >> >> v2: fix assertion for bindless to non bindless assignments >> >> >> >> Signed-off-by: Karol Herbst <kher...@redhat.com> >> >> --- >> >> src/compiler/nir/nir_split_var_copies.c | 8 +++- >> >> 1 file changed, 7 insertions(+), 1 deletion(-) >> >> >> >> diff --git a/src/compiler/nir/nir_split_var_copies.c >> >> b/src/compiler/nir/nir_split_var_copies.c >> >> index bc3ceedbdb8..e592754d770 100644 >> >> --- a/src/compiler/nir/nir_split_var_copies.c >> >> +++ b/src/compiler/nir/nir_split_var_copies.c >> >> @@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct >> >> split_var_copies_state *state) >> >>nir_deref_var *src_head = intrinsic->variables[1]; >> >>nir_deref *dest_tail = nir_deref_tail(_head->deref); >> >>nir_deref *src_tail = nir_deref_tail(_head->deref); >> >> + enum glsl_base_type base_type = >> >> glsl_get_base_type(src_tail->type); >> >> >> >> - switch (glsl_get_base_type(src_tail->type)) { >> >> + switch (base_type) { >> >>case GLSL_TYPE_ARRAY: >> >>case GLSL_TYPE_STRUCT: >> >> split_var_copy_instr(intrinsic, dest_head, src_head, >> >> @@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct >> >> split_var_copies_state *state) >> >> ralloc_steal(state->dead_ctx, instr); >> >> } >> >> break; >> >> + /* for bindless those are uint64 */ >> >> + case GLSL_TYPE_IMAGE: >> >> + case GLSL_TYPE_SAMPLER: >> >> + assert(src_head->var->data.bindless || >> >> +glsl_get_base_type(src_head->var->type) == base_type); >> >>case GLSL_TYPE_INT: >> >>case GLSL_TYPE_UINT: >> >>case GLSL_TYPE_INT16: >> >> -- >> >> 2.14.3 >> >> >> > >> > >> > ___ >> > mesa-dev mailing list >> > mesa-dev@lists.freedesktop.org >> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 3/3] glsl/nir: fix variable type for image intrinsics and ubos
On Tue, Apr 10, 2018 at 5:11 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst <kher...@redhat.com> wrote: >> >> If the bindless image is passed through a struct we ended up getting the >> glsl_type of the struct, not the image. >> >> variable_referenced points to the declaration of the struct, so it won't >> work >> for bindless images. So just drop it. >> >> Signed-off-by: Karol Herbst <kher...@redhat.com> >> --- >> src/compiler/glsl/glsl_to_nir.cpp | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/src/compiler/glsl/glsl_to_nir.cpp >> b/src/compiler/glsl/glsl_to_nir.cpp >> index 9f233637306..bb9ba3af04a 100644 >> --- a/src/compiler/glsl/glsl_to_nir.cpp >> +++ b/src/compiler/glsl/glsl_to_nir.cpp >> @@ -893,7 +893,7 @@ nir_visitor::visit(ir_call *ir) >> exec_node *param = ir->actual_parameters.get_head(); >> ir_dereference *image = (ir_dereference *)param; >> const glsl_type *type = >> -image->variable_referenced()->type->without_array(); >> +image->type->without_array(); > > > I asked this question on the last version as well: Do we really need > without_array()? > I don't think so actually, because it should be the sampler type already. I just forgot about that. >> >> instr->variables[0] = evaluate_deref(>instr, image); >> param = param->get_next(); >> -- >> 2.14.3 >> > > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars
On Tue, Apr 10, 2018 at 5:12 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > I still don't see anything to make nir_validate not fail out on you if it > sees a read or a write to/from an IMAGE or SAMPLER. > what kind of glsl code are you talking about here? I wrote some tests and things just seem to work out. I wasn't able to hit any other issues. > On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst <kher...@redhat.com> wrote: >> >> v2: fix assertion for bindless to non bindless assignments >> >> Signed-off-by: Karol Herbst <kher...@redhat.com> >> --- >> src/compiler/nir/nir_split_var_copies.c | 8 +++- >> 1 file changed, 7 insertions(+), 1 deletion(-) >> >> diff --git a/src/compiler/nir/nir_split_var_copies.c >> b/src/compiler/nir/nir_split_var_copies.c >> index bc3ceedbdb8..e592754d770 100644 >> --- a/src/compiler/nir/nir_split_var_copies.c >> +++ b/src/compiler/nir/nir_split_var_copies.c >> @@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct >> split_var_copies_state *state) >>nir_deref_var *src_head = intrinsic->variables[1]; >>nir_deref *dest_tail = nir_deref_tail(_head->deref); >>nir_deref *src_tail = nir_deref_tail(_head->deref); >> + enum glsl_base_type base_type = glsl_get_base_type(src_tail->type); >> >> - switch (glsl_get_base_type(src_tail->type)) { >> + switch (base_type) { >>case GLSL_TYPE_ARRAY: >>case GLSL_TYPE_STRUCT: >> split_var_copy_instr(intrinsic, dest_head, src_head, >> @@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct >> split_var_copies_state *state) >> ralloc_steal(state->dead_ctx, instr); >> } >> break; >> + /* for bindless those are uint64 */ >> + case GLSL_TYPE_IMAGE: >> + case GLSL_TYPE_SAMPLER: >> + assert(src_head->var->data.bindless || >> +glsl_get_base_type(src_head->var->type) == base_type); >>case GLSL_TYPE_INT: >>case GLSL_TYPE_UINT: >>case GLSL_TYPE_INT16: >> -- >> 2.14.3 >> > > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 2/3] nir: add support for bindless_texture samplers
v2: add both texture and sampler handles Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/glsl/glsl_to_nir.cpp | 17 +++-- src/compiler/nir/nir.h| 2 ++ src/compiler/nir/nir_print.c | 6 ++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index dbb58d82e8f..9f233637306 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir) { unsigned num_srcs; nir_texop op; + bool bindless = ir->sampler->variable_referenced()->contains_bindless(); + switch (ir->op) { case ir_tex: op = nir_texop_tex; @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir) num_srcs++; if (ir->offset != NULL) num_srcs++; + if (bindless) + num_srcs++; nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir) unreachable("not reached"); } - instr->texture = evaluate_deref(>instr, ir->sampler); - unsigned src_number = 0; + /* for bindless we use the texture handle src */ + if (bindless) { + instr->texture = NULL; + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->sampler)); + instr->src[src_number].src_type = nir_tex_src_texture_handle; + src_number++; + } else { + instr->texture = evaluate_deref(>instr, ir->sampler); + } + if (ir->coordinate != NULL) { instr->coord_components = ir->coordinate->type->vector_elements; instr->src[src_number].src = diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index f33049d7134..e395352f89c 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1218,6 +1218,8 @@ typedef enum { nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */ nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ nir_tex_src_plane, /* < selects plane for planar textures */ + nir_tex_src_texture_handle, /* < handle for bindless texture */ + nir_tex_src_sampler_handle, /* < handle for bindless sampler */ nir_num_tex_src_types } nir_tex_src_type; diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 21f13097651..52f20b1eb10 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -778,6 +778,12 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) case nir_tex_src_plane: fprintf(fp, "(plane)"); break; + case nir_tex_src_texture_handle: + fprintf(fp, "(texture_handle)"); + break; + case nir_tex_src_sampler_handle: + fprintf(fp, "(sampler_handle)"); + break; default: unreachable("Invalid texture source type"); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 3/3] glsl/nir: fix variable type for image intrinsics and ubos
If the bindless image is passed through a struct we ended up getting the glsl_type of the struct, not the image. variable_referenced points to the declaration of the struct, so it won't work for bindless images. So just drop it. Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/glsl/glsl_to_nir.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 9f233637306..bb9ba3af04a 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -893,7 +893,7 @@ nir_visitor::visit(ir_call *ir) exec_node *param = ir->actual_parameters.get_head(); ir_dereference *image = (ir_dereference *)param; const glsl_type *type = -image->variable_referenced()->type->without_array(); +image->type->without_array(); instr->variables[0] = evaluate_deref(>instr, image); param = param->get_next(); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars
v2: fix assertion for bindless to non bindless assignments Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/nir/nir_split_var_copies.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_split_var_copies.c b/src/compiler/nir/nir_split_var_copies.c index bc3ceedbdb8..e592754d770 100644 --- a/src/compiler/nir/nir_split_var_copies.c +++ b/src/compiler/nir/nir_split_var_copies.c @@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct split_var_copies_state *state) nir_deref_var *src_head = intrinsic->variables[1]; nir_deref *dest_tail = nir_deref_tail(_head->deref); nir_deref *src_tail = nir_deref_tail(_head->deref); + enum glsl_base_type base_type = glsl_get_base_type(src_tail->type); - switch (glsl_get_base_type(src_tail->type)) { + switch (base_type) { case GLSL_TYPE_ARRAY: case GLSL_TYPE_STRUCT: split_var_copy_instr(intrinsic, dest_head, src_head, @@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct split_var_copies_state *state) ralloc_steal(state->dead_ctx, instr); } break; + /* for bindless those are uint64 */ + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_SAMPLER: + assert(src_head->var->data.bindless || +glsl_get_base_type(src_head->var->type) == base_type); case GLSL_TYPE_INT: case GLSL_TYPE_UINT: case GLSL_TYPE_INT16: -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 0/3] nir: add support for ARB_bindless_texture texture handles
With this it should be possible to add support for texture handles for backends using NIR. changes since v2: * dropped patch for image handles, still need to work on that Karol Herbst (3): nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars nir: add support for bindless_texture samplers glsl/nir: fix variable type for image intrinsics and ubos src/compiler/glsl/glsl_to_nir.cpp | 19 --- src/compiler/nir/nir.h | 2 ++ src/compiler/nir/nir_print.c| 6 ++ src/compiler/nir/nir_split_var_copies.c | 8 +++- 4 files changed, 31 insertions(+), 4 deletions(-) -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] glsl: properly handle bindless sampler and image parameters
fixes a piglit test I sent to the list: spec@arb_bindless_texture@execution@samplers@basic-arithmetic-func-call-uvec2-texture2D Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/glsl/opt_function_inlining.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/glsl/opt_function_inlining.cpp b/src/compiler/glsl/opt_function_inlining.cpp index 04690b6cf45..3d00074bbc3 100644 --- a/src/compiler/glsl/opt_function_inlining.cpp +++ b/src/compiler/glsl/opt_function_inlining.cpp @@ -155,7 +155,7 @@ ir_call::generate_inline(ir_instruction *next_ir) ir_rvalue *param = (ir_rvalue *) actual_node; /* Generate a new variable for the parameter. */ - if (sig_param->type->contains_opaque()) { + if (!sig_param->contains_bindless() && sig_param->type->contains_opaque()) { /* For opaque types, we want the inlined variable references * referencing the passed in variable, since that will have * the location information, which an assignment of an opaque -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] nv50/ir: make a copy of tex src if it's referenced multiple times
I guess this fixes a bug somewhere? On Tue, Apr 10, 2018 at 6:11 AM, Ilia Mirkinwrote: > For nv50 we coalesce the srcs and defs into a single node. As such, we > can end up with impossible constraints if the source is referenced > after the tex operation (which, due to the coalescing of values, will > have overwritten it). > > This logic already exists for inserting moves for MERGE/UNION sources. > It's the exact same idea here, so leverage that code, which also > includes a few optimizations around not extending live ranges > unnecessarily. > > Signed-off-by: Ilia Mirkin > --- > > v1 -> v2: make use of existing logic in insertConstraintMoves > > src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 86 > -- > 1 file changed, 49 insertions(+), 37 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > index 3a0e56e1385..7d107aca68d 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > @@ -257,6 +257,7 @@ private: > private: >virtual bool visit(BasicBlock *); > > + void insertConstraintMove(Instruction *, int s); >bool insertConstraintMoves(); > >void condenseDefs(Instruction *); > @@ -2216,6 +2217,8 @@ > RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex) > for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) { >if (!tex->srcExists(c)) > tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue())); > + else > + insertConstraintMove(tex, c); >if (!tex->defExists(c)) > tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue())); > } > @@ -2288,6 +2291,51 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) > return true; > } > > +void > +RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int > s) > +{ > + const uint8_t size = cst->src(s).getSize(); > + > + assert(cst->getSrc(s)->defs.size() == 1); // still SSA > + > + Instruction *defi = cst->getSrc(s)->defs.front()->getInsn(); > + bool imm = defi->op == OP_MOV && > + defi->src(0).getFile() == FILE_IMMEDIATE; > + bool load = defi->op == OP_LOAD && > + defi->src(0).getFile() == FILE_MEMORY_CONST && > + !defi->src(0).isIndirect(0); > + // catch some cases where don't really need MOVs > + if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) { > + if (imm || load) { > + // Move the defi right before the cst. No point in expanding > + // the range. > + defi->bb->remove(defi); > + cst->bb->insertBefore(cst, defi); > + } > + return; > + } > + > + LValue *lval = new_LValue(func, cst->src(s).getFile()); > + lval->reg.size = size; > + > + Instruction *mov = new_Instruction(func, OP_MOV, typeOfSize(size)); > + mov->setDef(0, lval); > + mov->setSrc(0, cst->getSrc(s)); > + > + if (load) { > + mov->op = OP_LOAD; > + mov->setSrc(0, defi->getSrc(0)); > + } else if (imm) { > + mov->setSrc(0, defi->getSrc(0)); > + } > + > + if (defi->getPredicate()) > + mov->setPredicate(defi->cc, defi->getPredicate()); > + > + cst->setSrc(s, mov->getDef(0)); > + cst->bb->insertBefore(cst, mov); > +} > + > // Insert extra moves so that, if multiple register constraints on a value > are > // in conflict, these conflicts can be resolved. > bool > @@ -2328,46 +2376,10 @@ > RegAlloc::InsertConstraintsPass::insertConstraintMoves() > cst->bb->insertBefore(cst, mov); > continue; > } > -assert(cst->getSrc(s)->defs.size() == 1); // still SSA > - > -Instruction *defi = cst->getSrc(s)->defs.front()->getInsn(); > -bool imm = defi->op == OP_MOV && > - defi->src(0).getFile() == FILE_IMMEDIATE; > -bool load = defi->op == OP_LOAD && > - defi->src(0).getFile() == FILE_MEMORY_CONST && > - !defi->src(0).isIndirect(0); > -// catch some cases where don't really need MOVs > -if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) > { > - if (imm || load) { > - // Move the defi right before the cst. No point in > expanding > - // the range. > - defi->bb->remove(defi); > - cst->bb->insertBefore(cst, defi); > - } > - continue; > -} > > -LValue *lval = new_LValue(func, cst->src(s).getFile()); > -lval->reg.size = size; > - > -mov = new_Instruction(func, OP_MOV, typeOfSize(size)); > -mov->setDef(0, lval); > -mov->setSrc(0, cst->getSrc(s)); > - > -if (load) { > - mov->op = OP_LOAD; > - mov->setSrc(0, defi->getSrc(0)); > -}
Re: [Mesa-dev] [PATCH] RFC gallium: add 64 bit integer formats
On Tue, Apr 10, 2018 at 2:43 AM, Ilia Mirkin <imir...@alum.mit.edu> wrote: > On Mon, Apr 9, 2018 at 8:39 PM, Karol Herbst <kher...@redhat.com> wrote: >> unsigneds are needed by ARB_bindless_texture 64 bit vertex attribs, both for >> NV_vertex_attrib_integer64. >> >> Fixes the new piglit sampler-vertex-attrib-input-output test I sent some days >> ago for bindless_texture. >> >> The change inside vbo_attrtype_to_double_flag is what I am most concerned >> about. Maybe I should add another flag for 64 bit ints. Or rework what >> Doubles >> mean in gl_array_attributes. Or Rename that to is64Bit and rework all users >> of >> Doubles. >> >> Any suggestions? >> >> Signed-off-by: Karol Herbst <kher...@redhat.com> >> --- >> src/gallium/drivers/svga/svga_format.c | 8 >> src/gallium/include/pipe/p_format.h| 9 + >> src/mesa/main/glformats.c | 3 +++ >> src/mesa/state_tracker/st_atom_array.c | 30 +++--- >> src/mesa/vbo/vbo_private.h | 2 +- >> 5 files changed, 48 insertions(+), 4 deletions(-) >> >> diff --git a/src/gallium/drivers/svga/svga_format.c >> b/src/gallium/drivers/svga/svga_format.c >> index 20a6e6b159f..f01a0e79c72 100644 >> --- a/src/gallium/drivers/svga/svga_format.c >> +++ b/src/gallium/drivers/svga/svga_format.c >> @@ -369,6 +369,14 @@ static const struct vgpu10_format_entry >> format_conversion_table[] = >> { PIPE_FORMAT_A1B5G5R5_UNORM,SVGA3D_FORMAT_INVALID, >> SVGA3D_FORMAT_INVALID, 0 }, >> { PIPE_FORMAT_X1B5G5R5_UNORM,SVGA3D_FORMAT_INVALID, >> SVGA3D_FORMAT_INVALID, 0 }, >> { PIPE_FORMAT_A4B4G4R4_UNORM,SVGA3D_FORMAT_INVALID, >> SVGA3D_FORMAT_INVALID, 0 }, >> + { PIPE_FORMAT_R64_UINT, SVGA3D_FORMAT_INVALID, >> SVGA3D_FORMAT_INVALID, 0 }, >> + { PIPE_FORMAT_R64G64_UINT, SVGA3D_FORMAT_INVALID, >> SVGA3D_FORMAT_INVALID, 0 }, >> + { PIPE_FORMAT_R64G64B64_UINT,SVGA3D_FORMAT_INVALID, >> SVGA3D_FORMAT_INVALID, 0 }, >> + { PIPE_FORMAT_R64G64B64A64_UINT, SVGA3D_FORMAT_INVALID, >> SVGA3D_FORMAT_INVALID, 0 }, >> + { PIPE_FORMAT_R64_SINT, SVGA3D_FORMAT_INVALID, >> SVGA3D_FORMAT_INVALID, 0 }, >> + { PIPE_FORMAT_R64G64_SINT, SVGA3D_FORMAT_INVALID, >> SVGA3D_FORMAT_INVALID, 0 }, >> + { PIPE_FORMAT_R64G64B64_SINT,SVGA3D_FORMAT_INVALID, >> SVGA3D_FORMAT_INVALID, 0 }, >> + { PIPE_FORMAT_R64G64B64A64_SINT, SVGA3D_FORMAT_INVALID, >> SVGA3D_FORMAT_INVALID, 0 }, >> }; >> >> >> diff --git a/src/gallium/include/pipe/p_format.h >> b/src/gallium/include/pipe/p_format.h >> index 57399800fa4..df698856b70 100644 >> --- a/src/gallium/include/pipe/p_format.h >> +++ b/src/gallium/include/pipe/p_format.h >> @@ -396,6 +396,15 @@ enum pipe_format { >> PIPE_FORMAT_X1B5G5R5_UNORM = 310, >> PIPE_FORMAT_A4B4G4R4_UNORM = 311, >> >> + PIPE_FORMAT_R64_UINT= 312, >> + PIPE_FORMAT_R64G64_UINT = 313, >> + PIPE_FORMAT_R64G64B64_UINT = 314, >> + PIPE_FORMAT_R64G64B64A64_UINT = 315, >> + PIPE_FORMAT_R64_SINT= 316, >> + PIPE_FORMAT_R64G64_SINT = 317, >> + PIPE_FORMAT_R64G64B64_SINT = 318, >> + PIPE_FORMAT_R64G64B64A64_SINT = 319, >> + >> PIPE_FORMAT_COUNT >> }; >> >> diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c >> index 1e797c24c2a..feafd97f5ee 100644 >> --- a/src/mesa/main/glformats.c >> +++ b/src/mesa/main/glformats.c >> @@ -543,6 +543,9 @@ _mesa_bytes_per_vertex_attrib(GLint comps, GLenum type) >> case GL_INT: >> case GL_UNSIGNED_INT: >>return comps * sizeof(GLint); >> + /* ARB_bindless_texture */ >> + case GL_UNSIGNED_INT64_ARB: >> + return comps * sizeof(GLuint64EXT); >> case GL_FLOAT: >>return comps * sizeof(GLfloat); >> case GL_HALF_FLOAT_ARB: >> diff --git a/src/mesa/state_tracker/st_atom_array.c >> b/src/mesa/state_tracker/st_atom_array.c >> index 2fd67e8d840..1c3f677d4bf 100644 >> --- a/src/mesa/state_tracker/st_atom_array.c >> +++ b/src/mesa/state_tracker/st_atom_array.c >> @@ -230,6 +230,27 @@ static const uint16_t vertex_formats[][4][4] = { >> PIPE_FORMAT_R32G32B32A32_FIXED >>},
[Mesa-dev] [PATCH] RFC gallium: add 64 bit integer formats
unsigneds are needed by ARB_bindless_texture 64 bit vertex attribs, both for NV_vertex_attrib_integer64. Fixes the new piglit sampler-vertex-attrib-input-output test I sent some days ago for bindless_texture. The change inside vbo_attrtype_to_double_flag is what I am most concerned about. Maybe I should add another flag for 64 bit ints. Or rework what Doubles mean in gl_array_attributes. Or Rename that to is64Bit and rework all users of Doubles. Any suggestions? Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/gallium/drivers/svga/svga_format.c | 8 src/gallium/include/pipe/p_format.h| 9 + src/mesa/main/glformats.c | 3 +++ src/mesa/state_tracker/st_atom_array.c | 30 +++--- src/mesa/vbo/vbo_private.h | 2 +- 5 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/svga/svga_format.c b/src/gallium/drivers/svga/svga_format.c index 20a6e6b159f..f01a0e79c72 100644 --- a/src/gallium/drivers/svga/svga_format.c +++ b/src/gallium/drivers/svga/svga_format.c @@ -369,6 +369,14 @@ static const struct vgpu10_format_entry format_conversion_table[] = { PIPE_FORMAT_A1B5G5R5_UNORM,SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_X1B5G5R5_UNORM,SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_A4B4G4R4_UNORM,SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R64_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R64G64_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R64G64B64_UINT,SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R64G64B64A64_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R64_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R64G64_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R64G64B64_SINT,SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R64G64B64A64_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, }; diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 57399800fa4..df698856b70 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -396,6 +396,15 @@ enum pipe_format { PIPE_FORMAT_X1B5G5R5_UNORM = 310, PIPE_FORMAT_A4B4G4R4_UNORM = 311, + PIPE_FORMAT_R64_UINT= 312, + PIPE_FORMAT_R64G64_UINT = 313, + PIPE_FORMAT_R64G64B64_UINT = 314, + PIPE_FORMAT_R64G64B64A64_UINT = 315, + PIPE_FORMAT_R64_SINT= 316, + PIPE_FORMAT_R64G64_SINT = 317, + PIPE_FORMAT_R64G64B64_SINT = 318, + PIPE_FORMAT_R64G64B64A64_SINT = 319, + PIPE_FORMAT_COUNT }; diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c index 1e797c24c2a..feafd97f5ee 100644 --- a/src/mesa/main/glformats.c +++ b/src/mesa/main/glformats.c @@ -543,6 +543,9 @@ _mesa_bytes_per_vertex_attrib(GLint comps, GLenum type) case GL_INT: case GL_UNSIGNED_INT: return comps * sizeof(GLint); + /* ARB_bindless_texture */ + case GL_UNSIGNED_INT64_ARB: + return comps * sizeof(GLuint64EXT); case GL_FLOAT: return comps * sizeof(GLfloat); case GL_HALF_FLOAT_ARB: diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c index 2fd67e8d840..1c3f677d4bf 100644 --- a/src/mesa/state_tracker/st_atom_array.c +++ b/src/mesa/state_tracker/st_atom_array.c @@ -230,6 +230,27 @@ static const uint16_t vertex_formats[][4][4] = { PIPE_FORMAT_R32G32B32A32_FIXED }, }, + {{0}}, /* gap */ + { /* GL_INT64_ARB */ + {0}, + {0}, + { + PIPE_FORMAT_R64_SINT, + PIPE_FORMAT_R64G64_SINT, + PIPE_FORMAT_R64G64B64_SINT, + PIPE_FORMAT_R64G64B64A64_SINT + }, + }, + { /* GL_UNSIGNED_INT64_ARB */ + {0}, + {0}, + { + PIPE_FORMAT_R64_UINT, + PIPE_FORMAT_R64G64_UINT, + PIPE_FORMAT_R64G64B64_UINT, + PIPE_FORMAT_R64G64B64A64_UINT + }, + }, }; @@ -244,7 +265,7 @@ st_pipe_vertex_format(const struct gl_array_attributes *attrib) const bool normalized = attrib->Normalized; const bool integer = attrib->Integer; GLenum16 type = attrib->Type; - unsigned index; + unsigned index = integer*2 + normalized; assert(size >= 1 && size <= 4); assert(format == GL_RGBA || format == GL_BGRA); @@ -298,11 +319,14 @@ st_pipe_vertex_format(const struct gl_array_attributes *attrib) return PIPE_FORMAT_B8G8R8A8_UNORM; } break; + case GL_UNSIGNED_INT
Re: [Mesa-dev] [PATCH 1/4] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars
On Wed, Apr 4, 2018 at 2:23 AM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > I have a very strong feeling that this isn't the only place where > reading/writing IMAGE and SAMPLER variables is going to cause NIR heartburn. > For example, we have special cases in nir_validate for SUBROUTINE variables > and we probably need IMAGE and SAMPLER support everywhere we have SUBROUTINE > plus some (since you can write to them now as well). > yeah. I was just making piglit happy here. I guess I will try to run it with some games using bindless_textures and fix all the crashes I encounter there at least. More piglit tests might be useful as well. Sadly I don't see any bindless_textures tests in the CTS :( > > On Tue, Apr 3, 2018 at 6:21 AM, Karol Herbst <kher...@redhat.com> wrote: >> >> Signed-off-by: Karol Herbst <kher...@redhat.com> >> --- >> src/compiler/nir/nir_split_var_copies.c | 4 >> 1 file changed, 4 insertions(+) >> >> diff --git a/src/compiler/nir/nir_split_var_copies.c >> b/src/compiler/nir/nir_split_var_copies.c >> index bc3ceedbdb8..231a89add4d 100644 >> --- a/src/compiler/nir/nir_split_var_copies.c >> +++ b/src/compiler/nir/nir_split_var_copies.c >> @@ -241,6 +241,10 @@ split_var_copies_block(nir_block *block, struct >> split_var_copies_state *state) >> ralloc_steal(state->dead_ctx, instr); >> } >> break; >> + /* for bindless those are uint64 */ >> + case GLSL_TYPE_IMAGE: >> + case GLSL_TYPE_SAMPLER: >> + assert(src_head->var->data.bindless); >>case GLSL_TYPE_INT: >>case GLSL_TYPE_UINT: >>case GLSL_TYPE_INT16: >> -- >> 2.14.3 >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/4] nir: add support for bindless_texture samplers
On Wed, Apr 4, 2018 at 2:16 AM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > On Tue, Apr 3, 2018 at 6:21 AM, Karol Herbst <kher...@redhat.com> wrote: >> >> Signed-off-by: Karol Herbst <kher...@redhat.com> >> --- >> src/compiler/glsl/glsl_to_nir.cpp | 17 +++-- >> src/compiler/nir/nir.h| 1 + >> src/compiler/nir/nir_print.c | 3 +++ >> 3 files changed, 19 insertions(+), 2 deletions(-) >> >> diff --git a/src/compiler/glsl/glsl_to_nir.cpp >> b/src/compiler/glsl/glsl_to_nir.cpp >> index dbb58d82e8f..8e2d96a2361 100644 >> --- a/src/compiler/glsl/glsl_to_nir.cpp >> +++ b/src/compiler/glsl/glsl_to_nir.cpp >> @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir) >> { >> unsigned num_srcs; >> nir_texop op; >> + bool bindless = >> ir->sampler->variable_referenced()->contains_bindless(); >> + >> switch (ir->op) { >> case ir_tex: >>op = nir_texop_tex; >> @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir) >>num_srcs++; >> if (ir->offset != NULL) >>num_srcs++; >> + if (bindless) >> + num_srcs=+; >> >> nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); >> >> @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir) >>unreachable("not reached"); >> } >> >> - instr->texture = evaluate_deref(>instr, ir->sampler); >> - >> unsigned src_number = 0; >> >> + /* for bindless we use the handle src */ >> + if (bindless) { >> + instr->texture = NULL; >> + instr->src[src_number].src = >> + nir_src_for_ssa(evaluate_rvalue(ir->sampler)); >> + instr->src[src_number].src_type = nir_tex_src_handle; >> + src_number++; >> + } else { >> + instr->texture = evaluate_deref(>instr, ir->sampler); >> + } >> + >> if (ir->coordinate != NULL) { >>instr->coord_components = ir->coordinate->type->vector_elements; >>instr->src[src_number].src = >> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h >> index f33049d7134..e4d626d263e 100644 >> --- a/src/compiler/nir/nir.h >> +++ b/src/compiler/nir/nir.h >> @@ -1218,6 +1218,7 @@ typedef enum { >> nir_tex_src_texture_offset, /* < dynamically uniform indirect offset >> */ >> nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset >> */ >> nir_tex_src_plane, /* < selects plane for planar textures */ >> + nir_tex_src_handle, /* < handle for bindless samples */ > > > Do we want to have separate texture and sampler handles? We don't care for > GL but I kind-of think we will for Vulkan. > Don't know. Never looked into vulkan yet. I could rename it to sample_handle for now and we can add the texture handle later for vulkan? >> >> nir_num_tex_src_types >> } nir_tex_src_type; >> >> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c >> index 21f13097651..c9431555f2f 100644 >> --- a/src/compiler/nir/nir_print.c >> +++ b/src/compiler/nir/nir_print.c >> @@ -778,6 +778,9 @@ print_tex_instr(nir_tex_instr *instr, print_state >> *state) >>case nir_tex_src_plane: >> fprintf(fp, "(plane)"); >> break; >> + case nir_tex_src_handle: >> + fprintf(fp, "(handle)"); >> + break; >> >>default: >> unreachable("Invalid texture source type"); >> -- >> 2.14.3 >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/4] nir: add support for bindless_texture samplers
On Tue, Apr 3, 2018 at 3:21 PM, Karol Herbst <kher...@redhat.com> wrote: > Signed-off-by: Karol Herbst <kher...@redhat.com> > --- > src/compiler/glsl/glsl_to_nir.cpp | 17 +++-- > src/compiler/nir/nir.h| 1 + > src/compiler/nir/nir_print.c | 3 +++ > 3 files changed, 19 insertions(+), 2 deletions(-) > > diff --git a/src/compiler/glsl/glsl_to_nir.cpp > b/src/compiler/glsl/glsl_to_nir.cpp > index dbb58d82e8f..8e2d96a2361 100644 > --- a/src/compiler/glsl/glsl_to_nir.cpp > +++ b/src/compiler/glsl/glsl_to_nir.cpp > @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir) > { > unsigned num_srcs; > nir_texop op; > + bool bindless = ir->sampler->variable_referenced()->contains_bindless(); > + > switch (ir->op) { > case ir_tex: >op = nir_texop_tex; > @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir) >num_srcs++; > if (ir->offset != NULL) >num_srcs++; > + if (bindless) > + num_srcs=+; small type here, should have been "num_srcs++" instead. > > nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); > > @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir) >unreachable("not reached"); > } > > - instr->texture = evaluate_deref(>instr, ir->sampler); > - > unsigned src_number = 0; > > + /* for bindless we use the handle src */ > + if (bindless) { > + instr->texture = NULL; > + instr->src[src_number].src = > + nir_src_for_ssa(evaluate_rvalue(ir->sampler)); > + instr->src[src_number].src_type = nir_tex_src_handle; > + src_number++; > + } else { > + instr->texture = evaluate_deref(>instr, ir->sampler); > + } > + > if (ir->coordinate != NULL) { >instr->coord_components = ir->coordinate->type->vector_elements; >instr->src[src_number].src = > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > index f33049d7134..e4d626d263e 100644 > --- a/src/compiler/nir/nir.h > +++ b/src/compiler/nir/nir.h > @@ -1218,6 +1218,7 @@ typedef enum { > nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */ > nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ > nir_tex_src_plane, /* < selects plane for planar textures */ > + nir_tex_src_handle, /* < handle for bindless samples */ > nir_num_tex_src_types > } nir_tex_src_type; > > diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c > index 21f13097651..c9431555f2f 100644 > --- a/src/compiler/nir/nir_print.c > +++ b/src/compiler/nir/nir_print.c > @@ -778,6 +778,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) >case nir_tex_src_plane: > fprintf(fp, "(plane)"); > break; > + case nir_tex_src_handle: > + fprintf(fp, "(handle)"); > + break; > >default: > unreachable("Invalid texture source type"); > -- > 2.14.3 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/4] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars
Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/nir/nir_split_var_copies.c | 4 1 file changed, 4 insertions(+) diff --git a/src/compiler/nir/nir_split_var_copies.c b/src/compiler/nir/nir_split_var_copies.c index bc3ceedbdb8..231a89add4d 100644 --- a/src/compiler/nir/nir_split_var_copies.c +++ b/src/compiler/nir/nir_split_var_copies.c @@ -241,6 +241,10 @@ split_var_copies_block(nir_block *block, struct split_var_copies_state *state) ralloc_steal(state->dead_ctx, instr); } break; + /* for bindless those are uint64 */ + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_SAMPLER: + assert(src_head->var->data.bindless); case GLSL_TYPE_INT: case GLSL_TYPE_UINT: case GLSL_TYPE_INT16: -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/4] glsl/nir: fix variable type for image intrinsics and ubos
If the bindless image is passed through a struct we ended up getting the glsl_type of the struct, not the image. variable_referenced points to the declaration of the struct, so it won't work for bindless images. So just drop it. Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/glsl/glsl_to_nir.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 8e2d96a2361..1fc0cac4736 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -893,7 +893,7 @@ nir_visitor::visit(ir_call *ir) exec_node *param = ir->actual_parameters.get_head(); ir_dereference *image = (ir_dereference *)param; const glsl_type *type = -image->variable_referenced()->type->without_array(); +image->type->without_array(); instr->variables[0] = evaluate_deref(>instr, image); param = param->get_next(); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/4] nir: add support for bindless_texture
I think most of the changes are straigh forward. The changes needed for images should be discussed, because in its current form it would require changing all drivers using nir and supporting images. Karol Herbst (4): nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars nir: add support for bindless_texture samplers glsl/nir: fix variable type for image intrinsics and ubos RFC nir: add support for bindless_texture images src/compiler/glsl/glsl_to_nir.cpp | 38 - src/compiler/nir/nir.h | 3 ++- src/compiler/nir/nir_intrinsics.py | 24 ++--- src/compiler/nir/nir_print.c| 3 +++ src/compiler/nir/nir_split_var_copies.c | 4 5 files changed, 54 insertions(+), 18 deletions(-) -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/4] RFC nir: add support for bindless_texture images
I added another source for all image_var_* intrinsics. Drivers have to be adjusted with this change. There was some discussion to add new intrinsics to handle operations on bindless images. Maybe we can continue with this here? Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/glsl/glsl_to_nir.cpp | 19 +-- src/compiler/nir/nir.h | 2 +- src/compiler/nir/nir_intrinsics.py | 24 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 1fc0cac4736..4e053c140c2 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -894,10 +894,14 @@ nir_visitor::visit(ir_call *ir) ir_dereference *image = (ir_dereference *)param; const glsl_type *type = image->type->without_array(); + bool bindless = image->variable_referenced()->contains_bindless(); instr->variables[0] = evaluate_deref(>instr, image); param = param->get_next(); + if (bindless) +instr->variables[0]->var->data.bindless = true; + /* Set the intrinsic destination. */ if (ir->return_deref) { unsigned num_components = ir->return_deref->type->vector_elements; @@ -909,6 +913,11 @@ nir_visitor::visit(ir_call *ir) if (op == nir_intrinsic_image_var_size || op == nir_intrinsic_image_var_samples) { +if (bindless) { + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(image)); +} else { + instr->src[0] = nir_src_for_ssa(_undef->def); +} nir_builder_instr_insert(, >instr); break; } @@ -941,15 +950,21 @@ nir_visitor::visit(ir_call *ir) instr->src[1] = nir_src_for_ssa(_undef->def); } + if (bindless) { +instr->src[2] = nir_src_for_ssa(evaluate_rvalue(image)); + } else { +instr->src[2] = nir_src_for_ssa(_undef->def); + } + /* Set the intrinsic parameters. */ if (!param->is_tail_sentinel()) { -instr->src[2] = +instr->src[3] = nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); param = param->get_next(); } if (!param->is_tail_sentinel()) { -instr->src[3] = +instr->src[4] = nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); param = param->get_next(); } diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index e4d626d263e..c6081cbb61f 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1108,7 +1108,7 @@ typedef enum { } nir_intrinsic_index_flag; -#define NIR_INTRINSIC_MAX_INPUTS 4 +#define NIR_INTRINSIC_MAX_INPUTS 5 typedef struct { const char *name; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 1bc99552cd7..d6da63ab769 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -291,19 +291,19 @@ atomic3("atomic_counter_comp_swap") # argument with the value to be written, and image atomic operations take # either one or two additional scalar arguments with the same meaning as in # the ARB_shader_image_load_store specification. -intrinsic("image_var_load", src_comp=[4, 1], dest_comp=4, num_vars=1, +intrinsic("image_var_load", src_comp=[4, 1, 1], dest_comp=4, num_vars=1, flags=[CAN_ELIMINATE]) -intrinsic("image_var_store", src_comp=[4, 1, 4], num_vars=1) -intrinsic("image_var_atomic_add", src_comp=[4, 1, 1], dest_comp=1, num_vars=1) -intrinsic("image_var_atomic_min", src_comp=[4, 1, 1], dest_comp=1, num_vars=1) -intrinsic("image_var_atomic_max", src_comp=[4, 1, 1], dest_comp=1, num_vars=1) -intrinsic("image_var_atomic_and", src_comp=[4, 1, 1], dest_comp=1, num_vars=1) -intrinsic("image_var_atomic_or", src_comp=[4, 1, 1], dest_comp=1, num_vars=1) -intrinsic("image_var_atomic_xor", src_comp=[4, 1, 1], dest_comp=1, num_vars=1) -intrinsic("image_var_atomic_exchange", src_comp=[4, 1, 1], dest_comp=1, num_vars=1) -intrinsic("image_var_atomic_comp_swap", src_comp=[4, 1, 1, 1], dest_comp=1, num_vars=1) -intrinsic("image_var_size",dest_comp=0, num_vars=1, flags=[CAN_ELIMINATE, CAN_REORDER]) -intrinsic("image_var_samples", dest_comp=1, num_vars=1, flags=[CAN_ELIMINATE, CAN_REORDER]) +intrinsic("image_var_store", src_comp=[4, 1, 1, 4], num_vars=1) +intrinsic("image_var_atomic_add", src_comp=[4, 1, 1, 1], dest_comp=1, num_vars=1) +intrinsic("image_var_atomic_min", src_comp=[4, 1, 1, 1], dest_comp=1, num_vars
[Mesa-dev] [PATCH 2/4] nir: add support for bindless_texture samplers
Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/glsl/glsl_to_nir.cpp | 17 +++-- src/compiler/nir/nir.h| 1 + src/compiler/nir/nir_print.c | 3 +++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index dbb58d82e8f..8e2d96a2361 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir) { unsigned num_srcs; nir_texop op; + bool bindless = ir->sampler->variable_referenced()->contains_bindless(); + switch (ir->op) { case ir_tex: op = nir_texop_tex; @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir) num_srcs++; if (ir->offset != NULL) num_srcs++; + if (bindless) + num_srcs=+; nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir) unreachable("not reached"); } - instr->texture = evaluate_deref(>instr, ir->sampler); - unsigned src_number = 0; + /* for bindless we use the handle src */ + if (bindless) { + instr->texture = NULL; + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->sampler)); + instr->src[src_number].src_type = nir_tex_src_handle; + src_number++; + } else { + instr->texture = evaluate_deref(>instr, ir->sampler); + } + if (ir->coordinate != NULL) { instr->coord_components = ir->coordinate->type->vector_elements; instr->src[src_number].src = diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index f33049d7134..e4d626d263e 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1218,6 +1218,7 @@ typedef enum { nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */ nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ nir_tex_src_plane, /* < selects plane for planar textures */ + nir_tex_src_handle, /* < handle for bindless samples */ nir_num_tex_src_types } nir_tex_src_type; diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 21f13097651..c9431555f2f 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -778,6 +778,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) case nir_tex_src_plane: fprintf(fp, "(plane)"); break; + case nir_tex_src_handle: + fprintf(fp, "(handle)"); + break; default: unreachable("Invalid texture source type"); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nir: add support for 4 constant offsets in tg4
On Fri, Mar 30, 2018 at 9:35 PM, Eric Anholt <e...@anholt.net> wrote: > Karol Herbst <kher...@redhat.com> writes: > >> Nvidia hardware can do that natively so there is no need to lower that to >> four >> TG4s instructions. >> >> Signed-off-by: Karol Herbst <kher...@redhat.com> >> --- >> src/compiler/glsl/glsl_to_nir.cpp | 25 ++--- >> src/compiler/nir/nir.h| 9 - >> src/compiler/nir/nir_print.c | 9 + >> 3 files changed, 35 insertions(+), 8 deletions(-) >> >> diff --git a/src/compiler/glsl/glsl_to_nir.cpp >> b/src/compiler/glsl/glsl_to_nir.cpp >> index c4a6d52a5b2..4ea5f1616a7 100644 >> --- a/src/compiler/glsl/glsl_to_nir.cpp >> +++ b/src/compiler/glsl/glsl_to_nir.cpp >> @@ -2042,7 +2042,9 @@ nir_visitor::visit(ir_texture *ir) >>num_srcs++; >> if (ir->shadow_comparator != NULL) >>num_srcs++; >> - if (ir->offset != NULL) >> + if (ir->offset != NULL && ir->offset->type->is_array()) >> + num_srcs += ir->offset->type->array_size(); >> + else if (ir->offset != NULL) >>num_srcs++; >> >> nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); >> @@ -2097,12 +2099,21 @@ nir_visitor::visit(ir_texture *ir) >> >> if (ir->offset != NULL) { >>/* we don't support multiple offsets yet */ >> - assert(ir->offset->type->is_vector() || >> ir->offset->type->is_scalar()); >> - >> - instr->src[src_number].src = >> - nir_src_for_ssa(evaluate_rvalue(ir->offset)); >> - instr->src[src_number].src_type = nir_tex_src_offset; >> - src_number++; >> + if (ir->offset->type->is_vector() || ir->offset->type->is_scalar()) { >> + instr->src[src_number].src = >> +nir_src_for_ssa(evaluate_rvalue(ir->offset)); >> + instr->src[src_number].src_type = nir_tex_src_offset; >> + src_number++; >> + } else if (ir->offset->type->is_array()) { >> + for (int i = 0; i < ir->offset->type->array_size(); i++) { >> +instr->src[src_number].src = >> + >> nir_src_for_ssa(evaluate_rvalue(ir->offset->as_constant()->get_array_element(i)->as_rvalue())); >> +instr->src[src_number].src_type = >> (nir_tex_src_type)(nir_tex_src_offset + i); >> +src_number++; >> + } >> + } else { >> + assert(false); > > Maybe just do assert(ir->offset->type->is_array()) in the previous block > instead of the extra else. And optionally pull > ir->offset->as_constant() out to a temporary for nicer column wrapping. > Other than that, this seems good. > well the thing is, it only works with constants within the array. If you have non constant values the code wouldn't assert on that. But I will try to think about something nice there. > Reviewed-by: Eric Anholt <e...@anholt.net> > > If I'm reading my specs right, I'll be able to use this on vc6, too. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nir: add support for 4 constant offsets in tg4
Nvidia hardware can do that natively so there is no need to lower that to four TG4s instructions. Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/glsl/glsl_to_nir.cpp | 25 ++--- src/compiler/nir/nir.h| 9 - src/compiler/nir/nir_print.c | 9 + 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index c4a6d52a5b2..4ea5f1616a7 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -2042,7 +2042,9 @@ nir_visitor::visit(ir_texture *ir) num_srcs++; if (ir->shadow_comparator != NULL) num_srcs++; - if (ir->offset != NULL) + if (ir->offset != NULL && ir->offset->type->is_array()) + num_srcs += ir->offset->type->array_size(); + else if (ir->offset != NULL) num_srcs++; nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); @@ -2097,12 +2099,21 @@ nir_visitor::visit(ir_texture *ir) if (ir->offset != NULL) { /* we don't support multiple offsets yet */ - assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar()); - - instr->src[src_number].src = - nir_src_for_ssa(evaluate_rvalue(ir->offset)); - instr->src[src_number].src_type = nir_tex_src_offset; - src_number++; + if (ir->offset->type->is_vector() || ir->offset->type->is_scalar()) { + instr->src[src_number].src = +nir_src_for_ssa(evaluate_rvalue(ir->offset)); + instr->src[src_number].src_type = nir_tex_src_offset; + src_number++; + } else if (ir->offset->type->is_array()) { + for (int i = 0; i < ir->offset->type->array_size(); i++) { +instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->offset->as_constant()->get_array_element(i)->as_rvalue())); +instr->src[src_number].src_type = (nir_tex_src_type)(nir_tex_src_offset + i); +src_number++; + } + } else { + assert(false); + } } switch (ir->op) { diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 9fff1f4647d..7b02c4af05f 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1175,6 +1175,9 @@ typedef enum { nir_tex_src_projector, nir_tex_src_comparator, /* shadow comparator */ nir_tex_src_offset, + nir_tex_src_offset1, + nir_tex_src_offset2, + nir_tex_src_offset3, nir_tex_src_bias, nir_tex_src_lod, nir_tex_src_ms_index, /* MSAA sample index */ @@ -1377,6 +1380,9 @@ nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src) return nir_type_float; case nir_tex_src_offset: + case nir_tex_src_offset1: + case nir_tex_src_offset2: + case nir_tex_src_offset3: case nir_tex_src_ms_index: case nir_tex_src_texture_offset: case nir_tex_src_sampler_offset: @@ -1408,7 +1414,8 @@ nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src) /* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for * the offset, since a cube maps to a single face. */ - if (instr->src[src].src_type == nir_tex_src_offset) { + if (instr->src[src].src_type >= nir_tex_src_offset && + instr->src[src].src_type <= nir_tex_src_offset3) { if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) return 2; else if (instr->is_array) diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 21f13097651..e13a4f9aa6d 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -751,6 +751,15 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) case nir_tex_src_offset: fprintf(fp, "(offset)"); break; + case nir_tex_src_offset1: + fprintf(fp, "(offset1)"); + break; + case nir_tex_src_offset2: + fprintf(fp, "(offset2)"); + break; + case nir_tex_src_offset3: + fprintf(fp, "(offset3)"); + break; case nir_tex_src_bias: fprintf(fp, "(bias)"); break; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nvc0: drop image binding from BGR10A2 format
Did a CTS run on that. Things are looking better with it. No regressions. Tested-By: Karol Herbst <kher...@redhat.com> On Thu, Mar 29, 2018 at 5:47 AM, Ilia Mirkin <imir...@alum.mit.edu> wrote: > Fixes a bunch of new CTS pbo tests that use that as an output format, > which the state tracker converts into buffer image writes. > > No part of the driver is ready for BGR10A2. It could probably be enabled > on Maxwell+, but seems unnecessary. > > Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> > --- > src/gallium/drivers/nouveau/nv50/nv50_formats.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c > b/src/gallium/drivers/nouveau/nv50/nv50_formats.c > index 0ead8ac2e1e..9f8faf768dd 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c > +++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c > @@ -154,7 +154,7 @@ const struct nv50_format > nv50_format_table[PIPE_FORMAT_COUNT] = > > C4(A, R10G10B10A2_UNORM, RGB10_A2_UNORM, R, G, B, A, UNORM, A2B10G10R10, > TD), > F3(A, R10G10B10X2_UNORM, RGB10_A2_UNORM, R, G, B, xx, UNORM, A2B10G10R10, > T), > - C4(A, B10G10R10A2_UNORM, BGR10_A2_UNORM, B, G, R, A, UNORM, A2B10G10R10, > IB), > + C4(A, B10G10R10A2_UNORM, BGR10_A2_UNORM, B, G, R, A, UNORM, A2B10G10R10, > TB), > F3(A, B10G10R10X2_UNORM, BGR10_A2_UNORM, B, G, R, xx, UNORM, A2B10G10R10, > T), > C4(A, R10G10B10A2_SNORM, NONE, R, G, B, A, SNORM, A2B10G10R10, T), > C4(A, B10G10R10A2_SNORM, NONE, B, G, R, A, SNORM, A2B10G10R10, T), > -- > 2.16.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nvc0/ir: fix INTERP_* with indirect inputs
Reviewed-by: Karol Herbst <kher...@redhat.com> On Sat, Mar 24, 2018 at 8:19 PM, Ilia Mirkin <imir...@alum.mit.edu> wrote: > There were two problems, both of which are fixed now: > - The indirect address was not being shifted by 4 > - The indirect address was being placed as an argument in the offset case > > This fixes some of the new interpolateAt* piglits which now test for > these situations. > > Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> > --- > src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 7 --- > 1 file changed, 4 insertions(+), 3 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > index 09b5228127a..3c5bad05fe7 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > @@ -3083,10 +3083,11 @@ Converter::handleINTERP(Value *dst[4]) > assert(sym[c]); > op = insn->op; > mode = insn->ipa; > + ptr = insn->getIndirect(0, 0); >} > } else { >if (src.isIndirect(0)) > - ptr = fetchSrc(src.getIndirect(0), 0, NULL); > + ptr = shiftAddress(fetchSrc(src.getIndirect(0), 0, NULL)); > >// We can assume that the fixed index will point to an input of the > same >// interpolation type in case of an indirect. > @@ -3144,10 +3145,10 @@ Converter::handleINTERP(Value *dst[4]) >insn = mkOp1(op, TYPE_F32, dst[c], sym[c] ? sym[c] : srcToSym(src, c)); >if (op == OP_PINTERP) > insn->setSrc(1, w); > - if (ptr) > - insn->setIndirect(0, 0, ptr); >if (offset) > insn->setSrc(op == OP_PINTERP ? 2 : 1, offset); > + if (ptr) > + insn->setIndirect(0, 0, ptr); > >insn->setInterpolate(mode); > } > -- > 2.16.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nv50/ir: add more advanced slct constant folding code
From: Karol Herbst <karolher...@gmail.com> shader-db changes: total instructions in shared programs : 5894114 -> 5887031 (-0.12%) total gprs used in shared programs: 666558 -> 666514 (-0.01%) total shared used in shared programs : 520416 -> 520416 (0.00%) total local used in shared programs : 53524 -> 53572 (0.09%) total bytes used in shared programs : 54006744 -> 53942072 (-0.12%) local sharedgpr inst bytes helped 3 0 36 936 936 hurt 10 0 5 0 0 increase in local use is related to a bug in the spilling code Signed-off-by: Karol Herbst <karolher...@gmail.com> --- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 32 +++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 48cf74950df..18d5456b8fd 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -636,11 +636,35 @@ ConstantFolding::expr(Instruction *i, return; } break; - case OP_SLCT: - if (a->data.u32 != b->data.u32) + case OP_SLCT: { + CmpInstruction *slct = i->asCmp(); + // slct(a, a, c) -> a + if (a->data.u32 == b->data.u32) { + res.data.u32 = a->data.u32; + break; + } + // slct(-1, 0, c) -> set(c, 0) + if (a->data.u32 == 0x && + b->data.u32 == 0x0) { + i->op = OP_SET; + i->setSrc(0, i->getSrc(2)); + i->setSrc(2, NULL); + i->dType = TYPE_U32; return; - res.data.u32 = a->data.u32; - break; + } + // slct(0, -1, c) -> !set(c, 0) + if (a->data.u32 == 0x0 && + b->data.u32 == 0x) { + i->op = OP_SET; + i->swapSources(0, 1); + i->setSrc(0, i->getSrc(2)); + i->setSrc(2, NULL); + i->dType = TYPE_U32; + slct->setCondition(inverseCondCode(slct->getCondition())); + return; + } + return; + } case OP_EXTBF: { int offset = b->data.u32 & 0xff; int width = (b->data.u32 >> 8) & 0xff; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nv50/ir: optimise slct(t, f, set) to mov(set) or not(set)
just noticed I sent out the wrong version of that patch... On Tue, Mar 27, 2018 at 10:50 PM, Karol Herbst <kher...@redhat.com> wrote: > From: Karol Herbst <karolher...@gmail.com> > > helps mainly Feral-ported games > > changes in shader-db: > total instructions in shared programs : 3940749 -> 3935015 (-0.15%) > total gprs used in shared programs: 481460 -> 481433 (-0.01%) > total local used in shared programs : 27481 -> 27513 (0.12%) > total bytes used in shared programs : 36115776 -> 36063344 (-0.15%) > > localgpr inst bytes > helped 6 31 854 854 > hurt 10 5 1 1 > --- > .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 26 > -- > 1 file changed, 24 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > index 48cf74950df..1e3dea95494 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > @@ -637,9 +637,31 @@ ConstantFolding::expr(Instruction *i, >} >break; > case OP_SLCT: > - if (a->data.u32 != b->data.u32) > + // slct(a, a, b) -> a > + if (a->data.u32 == b->data.u32) { > + res.data.u32 = a->data.u32; > + } else { > + // slct_ne(true, false, bool) -> !bool > + CmpInstruction *slct = i->asCmp(); > + Instruction *set = i->getSrc(2)->getInsn(); > + if (!set || set->op != OP_SET) > +return; > + if (isFloatType(set->dType)) > +return; > + if ((slct->getCondition() == CC_NE && imm0.isInteger(-1) && > imm1.isInteger(0)) || > + (slct->getCondition() == CC_EQ && imm0.isInteger(0) && > imm1.isInteger(-1))) { > +bld.setPosition(i, false); > +bld.mkOp1(OP_MOV, i->dType, i->getDef(0), i->getSrc(2)); > +delete_Instruction(prog, i); > + } else if ( > + (slct->getCondition() == CC_EQ && imm0.isInteger(-1) && > imm1.isInteger(0)) || > + (slct->getCondition() == CC_NE && imm0.isInteger(0) && > imm1.isInteger(-1))) { > +bld.setPosition(i, false); > +bld.mkOp1(OP_NOT, i->dType, i->getDef(0), i->getSrc(2)); > +delete_Instruction(prog, i); > + } > return; > - res.data.u32 = a->data.u32; > + } >break; > case OP_EXTBF: { >int offset = b->data.u32 & 0xff; > -- > 2.14.3 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nv50/ir: optimise slct(t, f, set) to mov(set) or not(set)
On Tue, Mar 27, 2018 at 11:04 PM, Ilia Mirkin <imir...@alum.mit.edu> wrote: > On Tue, Mar 27, 2018 at 4:50 PM, Karol Herbst <kher...@redhat.com> wrote: >> From: Karol Herbst <karolher...@gmail.com> >> >> helps mainly Feral-ported games >> >> changes in shader-db: >> total instructions in shared programs : 3940749 -> 3935015 (-0.15%) >> total gprs used in shared programs: 481460 -> 481433 (-0.01%) >> total local used in shared programs : 27481 -> 27513 (0.12%) >> total bytes used in shared programs : 36115776 -> 36063344 (-0.15%) >> >> localgpr inst bytes >> helped 6 31 854 854 >> hurt 10 5 1 1 > > Can you look at the local memory regressions and see what happened? > Seems like local went up a lot. > well yeah, it only happened in shaders which were already spilling and we ended up with spilling sillyness again where a spilled value was loaded from lmem allthough we could have used the registers in that BB. >> --- >> .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 26 >> -- >> 1 file changed, 24 insertions(+), 2 deletions(-) >> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> index 48cf74950df..1e3dea95494 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> @@ -637,9 +637,31 @@ ConstantFolding::expr(Instruction *i, >>} >>break; >> case OP_SLCT: >> - if (a->data.u32 != b->data.u32) >> + // slct(a, a, b) -> a >> + if (a->data.u32 == b->data.u32) { >> + res.data.u32 = a->data.u32; >> + } else { >> + // slct_ne(true, false, bool) -> !bool >> + CmpInstruction *slct = i->asCmp(); >> + Instruction *set = i->getSrc(2)->getInsn(); >> + if (!set || set->op != OP_SET) >> +return; >> + if (isFloatType(set->dType)) >> +return; >> + if ((slct->getCondition() == CC_NE && imm0.isInteger(-1) && >> imm1.isInteger(0)) || >> + (slct->getCondition() == CC_EQ && imm0.isInteger(0) && >> imm1.isInteger(-1))) { >> +bld.setPosition(i, false); >> +bld.mkOp1(OP_MOV, i->dType, i->getDef(0), i->getSrc(2)); >> +delete_Instruction(prog, i); >> + } else if ( >> + (slct->getCondition() == CC_EQ && imm0.isInteger(-1) && >> imm1.isInteger(0)) || >> + (slct->getCondition() == CC_NE && imm0.isInteger(0) && >> imm1.isInteger(-1))) { >> +bld.setPosition(i, false); >> +bld.mkOp1(OP_NOT, i->dType, i->getDef(0), i->getSrc(2)); > > dType should always be U32 for MOV and NOT. > >> +delete_Instruction(prog, i); >> + } > > Don't forget to indicate that you made progress (if you did). > >> return; >> - res.data.u32 = a->data.u32; >> + } >>break; >> case OP_EXTBF: { >>int offset = b->data.u32 & 0xff; >> -- >> 2.14.3 >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nv50/ir: optimise slct(t, f, set) to mov(set) or not(set)
On Tue, Mar 27, 2018 at 11:19 PM, Ian Romanick <i...@freedesktop.org> wrote: > It will be interesting to see if this still occurs after nouveau > finishes switching to NIR. There's a pattern in nir_opt_algebraic for this. > well there is no plan to switch to NIR for everything where we can use TGSI. > On 03/27/2018 01:50 PM, Karol Herbst wrote: >> From: Karol Herbst <karolher...@gmail.com> >> >> helps mainly Feral-ported games >> >> changes in shader-db: >> total instructions in shared programs : 3940749 -> 3935015 (-0.15%) >> total gprs used in shared programs: 481460 -> 481433 (-0.01%) >> total local used in shared programs : 27481 -> 27513 (0.12%) >> total bytes used in shared programs : 36115776 -> 36063344 (-0.15%) >> >> localgpr inst bytes >> helped 6 31 854 854 >> hurt 10 5 1 1 >> --- >> .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 26 >> -- >> 1 file changed, 24 insertions(+), 2 deletions(-) >> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> index 48cf74950df..1e3dea95494 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> @@ -637,9 +637,31 @@ ConstantFolding::expr(Instruction *i, >>} >>break; >> case OP_SLCT: >> - if (a->data.u32 != b->data.u32) >> + // slct(a, a, b) -> a >> + if (a->data.u32 == b->data.u32) { >> + res.data.u32 = a->data.u32; >> + } else { >> + // slct_ne(true, false, bool) -> !bool >> + CmpInstruction *slct = i->asCmp(); >> + Instruction *set = i->getSrc(2)->getInsn(); >> + if (!set || set->op != OP_SET) >> +return; >> + if (isFloatType(set->dType)) >> +return; >> + if ((slct->getCondition() == CC_NE && imm0.isInteger(-1) && >> imm1.isInteger(0)) || >> + (slct->getCondition() == CC_EQ && imm0.isInteger(0) && >> imm1.isInteger(-1))) { >> +bld.setPosition(i, false); >> +bld.mkOp1(OP_MOV, i->dType, i->getDef(0), i->getSrc(2)); >> +delete_Instruction(prog, i); >> + } else if ( >> + (slct->getCondition() == CC_EQ && imm0.isInteger(-1) && >> imm1.isInteger(0)) || >> + (slct->getCondition() == CC_NE && imm0.isInteger(0) && >> imm1.isInteger(-1))) { >> +bld.setPosition(i, false); >> +bld.mkOp1(OP_NOT, i->dType, i->getDef(0), i->getSrc(2)); >> +delete_Instruction(prog, i); >> + } >> return; >> - res.data.u32 = a->data.u32; >> + } >>break; >> case OP_EXTBF: { >>int offset = b->data.u32 & 0xff; >> > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nv50/ir: optimise slct(t, f, set) to mov(set) or not(set)
From: Karol Herbst <karolher...@gmail.com> helps mainly Feral-ported games changes in shader-db: total instructions in shared programs : 3940749 -> 3935015 (-0.15%) total gprs used in shared programs: 481460 -> 481433 (-0.01%) total local used in shared programs : 27481 -> 27513 (0.12%) total bytes used in shared programs : 36115776 -> 36063344 (-0.15%) localgpr inst bytes helped 6 31 854 854 hurt 10 5 1 1 --- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 26 -- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 48cf74950df..1e3dea95494 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -637,9 +637,31 @@ ConstantFolding::expr(Instruction *i, } break; case OP_SLCT: - if (a->data.u32 != b->data.u32) + // slct(a, a, b) -> a + if (a->data.u32 == b->data.u32) { + res.data.u32 = a->data.u32; + } else { + // slct_ne(true, false, bool) -> !bool + CmpInstruction *slct = i->asCmp(); + Instruction *set = i->getSrc(2)->getInsn(); + if (!set || set->op != OP_SET) +return; + if (isFloatType(set->dType)) +return; + if ((slct->getCondition() == CC_NE && imm0.isInteger(-1) && imm1.isInteger(0)) || + (slct->getCondition() == CC_EQ && imm0.isInteger(0) && imm1.isInteger(-1))) { +bld.setPosition(i, false); +bld.mkOp1(OP_MOV, i->dType, i->getDef(0), i->getSrc(2)); +delete_Instruction(prog, i); + } else if ( + (slct->getCondition() == CC_EQ && imm0.isInteger(-1) && imm1.isInteger(0)) || + (slct->getCondition() == CC_NE && imm0.isInteger(0) && imm1.isInteger(-1))) { +bld.setPosition(i, false); +bld.mkOp1(OP_NOT, i->dType, i->getDef(0), i->getSrc(2)); +delete_Instruction(prog, i); + } return; - res.data.u32 = a->data.u32; + } break; case OP_EXTBF: { int offset = b->data.u32 & 0xff; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nv50/ra: prefer def == src2 for mad/sad with immediates on nvc0
From: Karol Herbst <karolher...@gmail.com> This helps with the PostRALoadPropagation pass moving long immediates into FMA/MAD instructions. changes in shader-db: total instructions in shared programs : 5894114 -> 5886074 (-0.14%) total gprs used in shared programs: 666558 -> 666563 (0.00%) total shared used in shared programs : 520416 -> 520416 (0.00%) total local used in shared programs : 53524 -> 53524 (0.00%) total bytes used in shared programs : 54006744 -> 53932472 (-0.14%) local sharedgpr inst bytes helped 0 0 241924192 hurt 0 0 7 9 9 Signed-off-by: Karol Herbst <karolher...@gmail.com> --- src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 30 ++ 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 3a0e56e1385..aeaf1ebe8f0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -1466,17 +1466,27 @@ GCRA::allocateRegisters(ArrayList& insns) nodes[i].init(regs, lval); RIG.insert([i]); - if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL && - prog->getTarget()->getChipset() < 0xc0) { + if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL) { Instruction *insn = lval->getInsn(); -if (insn->op == OP_MAD || insn->op == OP_FMA || insn->op == OP_SAD) - // Short encoding only possible if they're all GPRs, no need to - // affect them otherwise. - if (insn->flagsDef < 0 && - insn->src(0).getFile() == FILE_GPR && - insn->src(1).getFile() == FILE_GPR && - insn->src(2).getFile() == FILE_GPR) - nodes[i].addRegPreference(getNode(insn->getSrc(2)->asLValue())); +if (insn->op != OP_MAD && insn->op != OP_FMA && insn->op != OP_SAD) + continue; +// Short encoding or load propagate immediates only possible if +// they're all GPRs, no need to affect them otherwise. +if (insn->flagsDef >= 0 || +insn->src(0).getFile() != FILE_GPR || +insn->src(1).getFile() != FILE_GPR || +insn->src(2).getFile() != FILE_GPR) + continue; +// for nvc0+ we can loadpropagate limms only if we have +// dest == src2 reg id. Using getImmediate here is fine because +// we only set a reg preference and leave the immediate alone. +ImmediateValue imm; +if (prog->getTarget()->getChipset() >= 0xc0 && +!insn->src(0).getImmediate(imm) && +!insn->src(1).getImmediate(imm)) + continue; + +nodes[i].addRegPreference(getNode(insn->getSrc(2)->asLValue())); } } } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nvc0/ir: fix emiting NOTs with predicates
From: Karol Herbst <karolher...@gmail.com> Signed-off-by: Karol Herbst <karolher...@gmail.com> --- src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index e2c41a0e264..2f7dbd9519f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -853,6 +853,8 @@ void CodeEmitterNVC0::emitNOT(Instruction *i) { assert(i->encSize == 8); + if (i->getPredicate()) + i->moveSources(1, 1); i->setSrc(1, i->src(0)); emitForm_A(i, HEX64(6800, 01c3)); } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v5 18/21] clover: Handle CL_PROGRAM_IL in clGetProgramInfo
Reviewed-by: Karol Herbst <kher...@redhat.com> On Sun, Mar 25, 2018 at 8:02 PM, Pierre Moreau <pierre.mor...@free.fr> wrote: > Signed-off-by: Pierre Moreau <pierre.mor...@free.fr> > --- > src/gallium/state_trackers/clover/api/program.cpp | 7 +++ > 1 file changed, 7 insertions(+) > > diff --git a/src/gallium/state_trackers/clover/api/program.cpp > b/src/gallium/state_trackers/clover/api/program.cpp > index 3c7e56efb9..851a212b99 100644 > --- a/src/gallium/state_trackers/clover/api/program.cpp > +++ b/src/gallium/state_trackers/clover/api/program.cpp > @@ -429,6 +429,13 @@ clGetProgramInfo(cl_program d_prog, cl_program_info > param, >buf.as_string() = prog.source(); >break; > > + case CL_PROGRAM_IL: > + if (prog.has_il) > + buf.as_vector() = prog.il(); > + else if (r_size) > + *r_size = 0u; > + break; > + > case CL_PROGRAM_BINARY_SIZES: >buf.as_vector() = map([&](const device ) { > return prog.build(dev).binary.size(); > -- > 2.16.3 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 06/19] RFC: nir/vtn: "raw" pointer support
On Sun, Mar 25, 2018 at 2:18 PM, Rob Clark <robdcl...@gmail.com> wrote: > On Sun, Mar 25, 2018 at 6:35 AM, Karol Herbst <kher...@redhat.com> wrote: >> On Sun, Mar 25, 2018 at 12:18 AM, Rob Clark <robdcl...@gmail.com> wrote: >>> On Fri, Mar 23, 2018 at 5:18 PM, Jason Ekstrand <ja...@jlekstrand.net> >>> wrote: >>>> On Fri, Mar 23, 2018 at 2:15 PM, Karol Herbst <kher...@redhat.com> wrote: >>>>> >>>>> On Fri, Mar 23, 2018 at 10:07 PM, Jason Ekstrand <ja...@jlekstrand.net> >>>>> wrote: >>>>> > +list >>>>> > >>>>> > On Fri, Mar 23, 2018 at 1:45 PM, Karol Herbst <kher...@redhat.com> >>>>> > wrote: >>>>> >> >>>>> >> On Fri, Mar 23, 2018 at 9:30 PM, Jason Ekstrand <ja...@jlekstrand.net> >>>>> >> wrote: >>>>> >> > As I've been rewriting core NIR deref handling, I've been thinking >>>>> >> > about >>>>> >> > this problem quite a bit. One objective I have is to actually make >>>>> >> > UBO >>>>> >> > and >>>>> >> > SSBO access go through derefs instead of just being an offset and >>>>> >> > index >>>>> >> > so >>>>> >> > that the compiler can better reason about them. In particular, I >>>>> >> > want >>>>> >> > to be >>>>> >> > able to start doing load/store elimination on SSBOs, SLM, and >>>>> >> > whatever >>>>> >> > CL >>>>> >> > has which would be great for everyone's compute performance (GL, >>>>> >> > Vulkan, >>>>> >> > CL, >>>>> >> > etc.). >>>>> >> > >>>>> >> > I would be lying if I said I had a full plan but I do have part of a >>>>> >> > plan. >>>>> >> > In my patch which adds the deref instructions, I add a new "cast" >>>>> >> > deref >>>>> >> > type >>>>> >> > which takes an arbitrary value as it's source and kicks out a deref >>>>> >> > with >>>>> >> > a >>>>> >> > type. Whenever we discover that the source of the cast is actually >>>>> >> > another >>>>> >> > deref which is compatible (same type etc.), copy propagation gets rid >>>>> >> > of >>>>> >> > the >>>>> >> > cast for you. The idea is that, instead of doing a >>>>> >> > load_raw(raw_ptr), >>>>> >> > you >>>>> >> > would do a load((type *)raw_ptr). >>>>> >> > >>>>> >> > Right now, most of the core NIR optimizations will throw a fit if >>>>> >> > they >>>>> >> > ever >>>>> >> > see a cast. This is intentional because it requires us to manually >>>>> >> > go >>>>> >> > through and handle casts. This would mean that, at the moment, you >>>>> >> > would >>>>> >> > have to lower to load_raw intrinsics almost immediately after coming >>>>> >> > out >>>>> >> > of >>>>> >> > SPIR-V. >>>>> >> > >>>>> >> >>>>> >> Well it gets more fun with OpenCL 2.0 where you can have generic >>>>> >> pointer where you only know the type at creation type. You can also >>>>> >> declare generic pointers as function inputs in a way, that you never >>>>> >> actually know from where you have to load if you only have that one >>>>> >> function. So the actual load operation depends on when you create the >>>>> >> initial pointer variable (you can cast from X to generic, but not the >>>>> >> other way around). >>>>> >> >>>>> >> Which in the end means you can end up with load(generic_ptr) and only >>>>> >> following the chain up to it's creation (with function inlining in >>>>> >> mind) you know the actual memory target. >>>>> > >>>>> > >>>>>
Re: [Mesa-dev] [PATCH v3 06/19] RFC: nir/vtn: "raw" pointer support
On Sun, Mar 25, 2018 at 12:18 AM, Rob Clark <robdcl...@gmail.com> wrote: > On Fri, Mar 23, 2018 at 5:18 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: >> On Fri, Mar 23, 2018 at 2:15 PM, Karol Herbst <kher...@redhat.com> wrote: >>> >>> On Fri, Mar 23, 2018 at 10:07 PM, Jason Ekstrand <ja...@jlekstrand.net> >>> wrote: >>> > +list >>> > >>> > On Fri, Mar 23, 2018 at 1:45 PM, Karol Herbst <kher...@redhat.com> >>> > wrote: >>> >> >>> >> On Fri, Mar 23, 2018 at 9:30 PM, Jason Ekstrand <ja...@jlekstrand.net> >>> >> wrote: >>> >> > As I've been rewriting core NIR deref handling, I've been thinking >>> >> > about >>> >> > this problem quite a bit. One objective I have is to actually make >>> >> > UBO >>> >> > and >>> >> > SSBO access go through derefs instead of just being an offset and >>> >> > index >>> >> > so >>> >> > that the compiler can better reason about them. In particular, I >>> >> > want >>> >> > to be >>> >> > able to start doing load/store elimination on SSBOs, SLM, and >>> >> > whatever >>> >> > CL >>> >> > has which would be great for everyone's compute performance (GL, >>> >> > Vulkan, >>> >> > CL, >>> >> > etc.). >>> >> > >>> >> > I would be lying if I said I had a full plan but I do have part of a >>> >> > plan. >>> >> > In my patch which adds the deref instructions, I add a new "cast" >>> >> > deref >>> >> > type >>> >> > which takes an arbitrary value as it's source and kicks out a deref >>> >> > with >>> >> > a >>> >> > type. Whenever we discover that the source of the cast is actually >>> >> > another >>> >> > deref which is compatible (same type etc.), copy propagation gets rid >>> >> > of >>> >> > the >>> >> > cast for you. The idea is that, instead of doing a >>> >> > load_raw(raw_ptr), >>> >> > you >>> >> > would do a load((type *)raw_ptr). >>> >> > >>> >> > Right now, most of the core NIR optimizations will throw a fit if >>> >> > they >>> >> > ever >>> >> > see a cast. This is intentional because it requires us to manually >>> >> > go >>> >> > through and handle casts. This would mean that, at the moment, you >>> >> > would >>> >> > have to lower to load_raw intrinsics almost immediately after coming >>> >> > out >>> >> > of >>> >> > SPIR-V. >>> >> > >>> >> >>> >> Well it gets more fun with OpenCL 2.0 where you can have generic >>> >> pointer where you only know the type at creation type. You can also >>> >> declare generic pointers as function inputs in a way, that you never >>> >> actually know from where you have to load if you only have that one >>> >> function. So the actual load operation depends on when you create the >>> >> initial pointer variable (you can cast from X to generic, but not the >>> >> other way around). >>> >> >>> >> Which in the end means you can end up with load(generic_ptr) and only >>> >> following the chain up to it's creation (with function inlining in >>> >> mind) you know the actual memory target. >>> > >>> > >>> > Yup. And there will always be crazy cases where you can't actually >>> > follow >>> > it and you have to emit a pile of code to load different ways depending >>> > on >>> > some bits somewhere that tell you how to load it. I'm well aware of the >>> > insanity. :-) This is part of the reason why I'm glad I'm not trying to >>> > write an OpenCL 2.0 driver. >>> > >>> > This insanity is exactly why I'm suggesting the pointer casting. Sure, >>> > you >>> > may not know the data type until the actual load. In that case, you end >>> > up >>> > with the cast being right before the load. If you don't know the >>> > storage >>>
Re: [Mesa-dev] [PATCH v3 06/19] RFC: nir/vtn: "raw" pointer support
On Fri, Mar 23, 2018 at 10:07 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > +list > > On Fri, Mar 23, 2018 at 1:45 PM, Karol Herbst <kher...@redhat.com> wrote: >> >> On Fri, Mar 23, 2018 at 9:30 PM, Jason Ekstrand <ja...@jlekstrand.net> >> wrote: >> > As I've been rewriting core NIR deref handling, I've been thinking about >> > this problem quite a bit. One objective I have is to actually make UBO >> > and >> > SSBO access go through derefs instead of just being an offset and index >> > so >> > that the compiler can better reason about them. In particular, I want >> > to be >> > able to start doing load/store elimination on SSBOs, SLM, and whatever >> > CL >> > has which would be great for everyone's compute performance (GL, Vulkan, >> > CL, >> > etc.). >> > >> > I would be lying if I said I had a full plan but I do have part of a >> > plan. >> > In my patch which adds the deref instructions, I add a new "cast" deref >> > type >> > which takes an arbitrary value as it's source and kicks out a deref with >> > a >> > type. Whenever we discover that the source of the cast is actually >> > another >> > deref which is compatible (same type etc.), copy propagation gets rid of >> > the >> > cast for you. The idea is that, instead of doing a load_raw(raw_ptr), >> > you >> > would do a load((type *)raw_ptr). >> > >> > Right now, most of the core NIR optimizations will throw a fit if they >> > ever >> > see a cast. This is intentional because it requires us to manually go >> > through and handle casts. This would mean that, at the moment, you >> > would >> > have to lower to load_raw intrinsics almost immediately after coming out >> > of >> > SPIR-V. >> > >> >> Well it gets more fun with OpenCL 2.0 where you can have generic >> pointer where you only know the type at creation type. You can also >> declare generic pointers as function inputs in a way, that you never >> actually know from where you have to load if you only have that one >> function. So the actual load operation depends on when you create the >> initial pointer variable (you can cast from X to generic, but not the >> other way around). >> >> Which in the end means you can end up with load(generic_ptr) and only >> following the chain up to it's creation (with function inlining in >> mind) you know the actual memory target. > > > Yup. And there will always be crazy cases where you can't actually follow > it and you have to emit a pile of code to load different ways depending on > some bits somewhere that tell you how to load it. I'm well aware of the > insanity. :-) This is part of the reason why I'm glad I'm not trying to > write an OpenCL 2.0 driver. > > This insanity is exactly why I'm suggesting the pointer casting. Sure, you > may not know the data type until the actual load. In that case, you end up > with the cast being right before the load. If you don't know the storage > class, maybe you have to switch and do multiple casts based on some bits. > Alternatively, if you don't know the storage class, we can just let the > deref mode be 0 for "I don't know". or maybe multiple bits for "these are > the things it might be". In any case, I think we can handle it. > there shouldn't be a situation where we don't know, except when you don't inline all functions. I think Rob had the idea of fat pointers where a pointer is a vec2 and the 2nd component contains the actual pointer type and you end up with a switch over the type to get the correct storage class. And if the compiler inlines all functions, it should be able to optimize that switch away. > It's insane but we need some sort of structure to be able to reason about > the insanity. Immediately lowering everything to load_raw is a good way to > get a driver off the ground. What it's not so good for is making an > optimizing compiler that can reason about these crazy pointers and actually > optimize them. Lest I sound too negative, I'm 100% fine with taking a short > path to getting something working now so long as it doesn't cloud up our > ability to do better in the future. > >> >> And I think the issue here is not that it is some kind of raw pointer >> in the patch, but more like an unbound/physical pointer, which doesn't >> relate to any variable. It is just a value like any other int/long as >> well. >> >> > On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst <
Re: [Mesa-dev] [PATCH v3 15/19] nir: use load_local_group_size
On Fri, Mar 23, 2018 at 9:18 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst <kher...@redhat.com> wrote: >> >> From: Rob Clark <robdcl...@gmail.com> >> >> If local_size is not known at compile time, which is the case with >> clover, use the load_local_group_size intrinsic instead. >> >> Signed-off-by: Karol Herbst <kher...@redhat.com> >> --- >> src/compiler/nir/nir_lower_system_values.c | 25 + >> 1 file changed, 17 insertions(+), 8 deletions(-) >> >> diff --git a/src/compiler/nir/nir_lower_system_values.c >> b/src/compiler/nir/nir_lower_system_values.c >> index d507c28f421..ff4e09c8e61 100644 >> --- a/src/compiler/nir/nir_lower_system_values.c >> +++ b/src/compiler/nir/nir_lower_system_values.c >> @@ -57,19 +57,28 @@ convert_block(nir_block *block, nir_builder *b) >>*"The value of gl_GlobalInvocationID is equal to >>*gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID" >>*/ >> + nir_ssa_def *local_size_def; >> >> - nir_const_value local_size; >> - memset(_size, 0, sizeof(local_size)); >> - local_size.u64[0] = b->shader->info.cs.local_size[0]; >> - local_size.u64[1] = b->shader->info.cs.local_size[1]; >> - local_size.u64[2] = b->shader->info.cs.local_size[2]; >> + /* if local_size[] is already known, use that, otherwise use >> + * load_local_group_size intrinsic: >> + */ >> + if (b->shader->info.cs.local_size[0]) { >> +nir_const_value local_size; >> +memset(_size, 0, sizeof(local_size)); >> +local_size.u64[0] = b->shader->info.cs.local_size[0]; >> +local_size.u64[1] = b->shader->info.cs.local_size[1]; >> +local_size.u64[2] = b->shader->info.cs.local_size[2]; >> + >> +local_size_def = nir_build_imm(b, 3, bit_size, local_size); >> >> + } else { >> +local_size_def = nir_load_local_group_size(b, bit_size); >> + } > > > I commented on an earlier patch about how the approach to building the > 32/64-bit immediates is wrong. > oh right, I totally forgot about that. > Setting that aside, this patch looks fine to me in principal. There's a > part of me that doesn't like using cs.local_size[0] being the trigger but I > think it's probably ok. Maybe we should assert that cs_local_size is either > all zero (second case) or all not zero (first case) just to be safe. > I think the main problem here is, that even with OpenCL kernels you can specify it, but then overwrite it at runtime again. So yes I agree, that we need something better here. >> >> >> nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size); >> nir_ssa_def *local_id = nir_load_local_invocation_id(b, >> bit_size); >> >> - sysval = nir_iadd(b, nir_imul(b, group_id, >> - nir_build_imm(b, 3, bit_size, >> local_size)), >> - local_id); >> + sysval = nir_iadd(b, nir_imul(b, group_id, local_size_def), >> + local_id); >> break; >>} >> >> -- >> 2.14.3 >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 12/19] nir: specify bit_size when loading system values
On Fri, Mar 23, 2018 at 9:15 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst <kher...@redhat.com> wrote: >> >> With OpenCL the size of some system value depends on the Physical model >> choosen, so we need a way to load any system value as 32 or 64 bit. >> >> Signed-off-by: Karol Herbst <kher...@redhat.com> >> --- >> src/compiler/nir/nir_builder.h | 10 +--- >> src/compiler/nir/nir_lower_alpha_test.c | 2 +- >> src/compiler/nir/nir_lower_clip.c| 3 ++- >> src/compiler/nir/nir_lower_subgroups.c | 8 +++--- >> src/compiler/nir/nir_lower_system_values.c | 31 >> >> src/compiler/nir/nir_lower_two_sided_color.c | 2 +- >> src/compiler/nir/nir_lower_wpos_center.c | 2 +- >> src/compiler/spirv/vtn_subgroup.c| 2 +- >> src/gallium/auxiliary/nir/tgsi_to_nir.c | 3 ++- >> src/intel/blorp/blorp_blit.c | 2 +- >> src/intel/blorp/blorp_clear.c| 2 +- >> src/intel/compiler/brw_nir_lower_cs_intrinsics.c | 6 ++--- >> src/mesa/drivers/dri/i965/brw_tcs.c | 2 +- >> 13 files changed, 40 insertions(+), 35 deletions(-) >> >> diff --git a/src/compiler/nir/nir_builder.h >> b/src/compiler/nir/nir_builder.h >> index 36e0ae3ac63..4e93cd08169 100644 >> --- a/src/compiler/nir/nir_builder.h >> +++ b/src/compiler/nir/nir_builder.h >> @@ -612,13 +612,14 @@ nir_copy_var(nir_builder *build, nir_variable *dest, >> nir_variable *src) >> >> /* Generic builder for system values. */ >> static inline nir_ssa_def * >> -nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index) >> +nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index, >> + unsigned bit_size) >> { >> nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, >> op); >> load->num_components = nir_intrinsic_infos[op].dest_components; >> load->const_index[0] = index; >> nir_ssa_dest_init(>instr, >dest, >> - nir_intrinsic_infos[op].dest_components, 32, NULL); >> + nir_intrinsic_infos[op].dest_components, bit_size, >> NULL); >> nir_builder_instr_insert(build, >instr); >> return >dest.ssa; >> } >> @@ -630,9 +631,10 @@ nir_load_system_value(nir_builder *build, >> nir_intrinsic_op op, int index) >> >> #define DEFINE_SYSTEM_VALUE(name) >> \ >> static inline nir_ssa_def * >> \ >> - nir_load_##name(nir_builder *build) >> \ >> + nir_load_##name(nir_builder *build, unsigned bit_size) >> \ > > > I was really hoping that this change wouldn't touch every single intrinsic > helper. Maybe with Rob's python-based intrinsics table we can do something > better. > I was kind of thinking of declaring builtins as either 32, 64 or 32/64 bit and just generate a function with a bit_size argument for the later maybe, but I think we really want to do that in python and not with C preprocessor macros :) >> >> { >> \ >> - return nir_load_system_value(build, nir_intrinsic_load_##name, 0); >> \ >> + return nir_load_system_value(build, nir_intrinsic_load_##name, 0, >> \ >> + bit_size); >> \ >> } >> >> #include "nir_intrinsics.h" >> diff --git a/src/compiler/nir/nir_lower_alpha_test.c >> b/src/compiler/nir/nir_lower_alpha_test.c >> index 6bf9ff142df..29f91ab9428 100644 >> --- a/src/compiler/nir/nir_lower_alpha_test.c >> +++ b/src/compiler/nir/nir_lower_alpha_test.c >> @@ -92,7 +92,7 @@ nir_lower_alpha_test(nir_shader *shader, enum >> compare_func func, >> >> nir_ssa_def *condition = >>nir_compare_func(, func, >> - alpha, nir_load_alpha_ref_float()); >> + alpha, nir_load_alpha_ref_float(, >> 32)); >> >> nir_intrinsic_instr *discard = >>nir_intrinsic_instr_create(b.shader, >> diff --git a/src/compiler/nir/nir_lower_clip.c >> b/src/compiler/nir/nir_lower_clip.c >> index ea12f51a7bb..b9a91f7d40b 100644 >> --- a/src/compiler/nir/nir_lower_clip.c >> +++ b/src/compiler/nir/nir_lower_clip.c >> @@ -174,7 +174,8 @@ lower_clip_vs(nir_function_impl *impl, unsigned >> ucp_enables, >> for (int plane = 0; plane < MAX_CLIP_P
[Mesa-dev] [PATCH v3 16/19] nir: add load_kernel_param
OpenCL kernels have parameters (see pipe_grid_info::input), and so we need a way to access them. The offset source is the offset of the parameter to load in the kernel input buffer. v2: improve commit message remove BASE split lower_io changes into separate commit Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/nir/nir_intrinsics.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index 6597eaea87b..fb8d53b3c0d 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -495,6 +495,8 @@ LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REOR LOAD(input, 1, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* src[] = { vertex, offset }. const_index[] = { base, component } */ LOAD(per_vertex_input, 2, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { address }. No const_index */ +LOAD(kernel_param, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* src[] = { barycoord, offset }. const_index[] = { base, component } */ INTRINSIC(load_interpolated_input, 2, ARR(2, 1), true, 0, 0, 2, BASE, COMPONENT, xx, -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 15/19] nir: use load_local_group_size
From: Rob Clark <robdcl...@gmail.com> If local_size is not known at compile time, which is the case with clover, use the load_local_group_size intrinsic instead. Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/nir/nir_lower_system_values.c | 25 + 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c index d507c28f421..ff4e09c8e61 100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@ -57,19 +57,28 @@ convert_block(nir_block *block, nir_builder *b) *"The value of gl_GlobalInvocationID is equal to *gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID" */ + nir_ssa_def *local_size_def; - nir_const_value local_size; - memset(_size, 0, sizeof(local_size)); - local_size.u64[0] = b->shader->info.cs.local_size[0]; - local_size.u64[1] = b->shader->info.cs.local_size[1]; - local_size.u64[2] = b->shader->info.cs.local_size[2]; + /* if local_size[] is already known, use that, otherwise use + * load_local_group_size intrinsic: + */ + if (b->shader->info.cs.local_size[0]) { +nir_const_value local_size; +memset(_size, 0, sizeof(local_size)); +local_size.u64[0] = b->shader->info.cs.local_size[0]; +local_size.u64[1] = b->shader->info.cs.local_size[1]; +local_size.u64[2] = b->shader->info.cs.local_size[2]; + +local_size_def = nir_build_imm(b, 3, bit_size, local_size); + } else { +local_size_def = nir_load_local_group_size(b, bit_size); + } nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size); nir_ssa_def *local_id = nir_load_local_invocation_id(b, bit_size); - sysval = nir_iadd(b, nir_imul(b, group_id, - nir_build_imm(b, 3, bit_size, local_size)), - local_id); + sysval = nir_iadd(b, nir_imul(b, group_id, local_size_def), + local_id); break; } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 17/19] RFC nir/lower_io: lower kernel entry param load_vars to load_kernel_param
For OpenCL kernels we have an input buffer where most of the parameters are stored. For this we have to keep track of alignment and padding rules to correctly identify the offset of each parameter inside that buffer. For this we can just rely on the new cl_size and cl_alignment glsl_type functions. Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/nir/nir_lower_io.c | 39 --- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index df91febd68d..ed8e361651c 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -39,6 +39,7 @@ struct lower_io_state { int (*type_size)(const struct glsl_type *type); nir_variable_mode modes; nir_lower_io_options options; + unsigned *offsets; }; void @@ -159,7 +160,8 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, nir_ssa_def *vertex_index, nir_ssa_def *offset, unsigned component) { - const nir_shader *nir = state->builder.shader; + nir_builder *b = >builder; + nir_shader *nir = b->shader; nir_variable *var = intrin->variables[0]->var; nir_variable_mode mode = var->data.mode; nir_ssa_def *barycentric = NULL; @@ -199,6 +201,11 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, case nir_var_shared: op = nir_intrinsic_load_shared; break; + case nir_var_param: + if (nir_cf_node_get_function(>instr.block->cf_node) == nir_shader_get_entrypoint(nir)) { + op = nir_intrinsic_load_kernel_param; + break; + } default: unreachable("Unknown variable mode"); } @@ -207,7 +214,9 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, nir_intrinsic_instr_create(state->builder.shader, op); load->num_components = intrin->num_components; - nir_intrinsic_set_base(load, var->data.driver_location); + if (op != nir_intrinsic_load_kernel_param) + nir_intrinsic_set_base(load, var->data.driver_location); + if (mode == nir_var_shader_in || mode == nir_var_shader_out) nir_intrinsic_set_component(load, component); @@ -220,6 +229,8 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, } else if (barycentric) { load->src[0] = nir_src_for_ssa(barycentric); load->src[1] = nir_src_for_ssa(offset); + } else if (op == nir_intrinsic_load_kernel_param) { + load->src[0] = nir_src_for_ssa(nir_imm_int(b, state->offsets[var->data.location])); } else { load->src[0] = nir_src_for_ssa(offset); } @@ -407,7 +418,8 @@ nir_lower_io_block(nir_block *block, if (mode != nir_var_shader_in && mode != nir_var_shader_out && mode != nir_var_shared && - mode != nir_var_uniform) + mode != nir_var_uniform && + mode != nir_var_param) continue; b->cursor = nir_before_instr(instr); @@ -481,6 +493,22 @@ nir_lower_io_block(nir_block *block, return progress; } +static void +nir_lower_io_calc_param_offsets(struct lower_io_state *state, +nir_function_impl *impl) +{ + state->offsets = ralloc_array(state->builder.shader, unsigned, + impl->num_params); + state->offsets[0] = 0; + for (int i = 0; i < impl->num_params; ++i) { + nir_variable *var = impl->params[i]; + state->offsets[i] = align(state->offsets[i], glsl_get_cl_alignment(var->type)); + if (i + 1 < impl->num_params) + state->offsets[i + 1] = state->offsets[i] + glsl_get_cl_size(var->type); + } + ralloc_free(state->offsets); +} + static bool nir_lower_io_impl(nir_function_impl *impl, nir_variable_mode modes, @@ -495,6 +523,11 @@ nir_lower_io_impl(nir_function_impl *impl, state.type_size = type_size; state.options = options; + if (modes & nir_var_param && + impl == nir_shader_get_entrypoint(state.builder.shader) && + impl->num_params) + nir_lower_io_calc_param_offsets(, impl); + nir_foreach_block(block, impl) { progress |= nir_lower_io_block(block, ); } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 19/19] RFC: nir/vtn: member in struct deref
Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/spirv/vtn_private.h | 5 +++-- src/compiler/spirv/vtn_variables.c | 14 +++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h index 510c12faa87..45b581bf80e 100644 --- a/src/compiler/spirv/vtn_private.h +++ b/src/compiler/spirv/vtn_private.h @@ -700,12 +700,13 @@ void vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, nir_deref_var *dest); struct vtn_ssa_value *vtn_pointer_load(struct vtn_builder *b, - struct vtn_pointer *ptr); + struct vtn_pointer *ptr, + struct vtn_type *); void vtn_pointer_store(struct vtn_builder *b, struct vtn_ssa_value *src, struct vtn_pointer *ptr); struct vtn_ssa_value * -vtn_variable_load(struct vtn_builder *b, struct vtn_pointer *src); +vtn_variable_load(struct vtn_builder *b, struct vtn_pointer *src, struct vtn_type *); void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, struct vtn_pointer *dest); diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 6cf1a63f8c9..76b38b85e80 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -739,9 +739,9 @@ vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, } struct vtn_ssa_value * -vtn_pointer_load(struct vtn_builder *b, struct vtn_pointer *ptr) +vtn_pointer_load(struct vtn_builder *b, struct vtn_pointer *ptr, struct vtn_type *res_type) { - const struct glsl_type *type = ptr->type->type; + const struct glsl_type *type = res_type->type; struct vtn_ssa_value *val = vtn_create_ssa_value(b, type); nir_intrinsic_op op = nir_intrinsic_load_global; @@ -1207,7 +1207,7 @@ _vtn_variable_load_store(struct vtn_builder *b, bool load, * with it. Just directly generate load/store_global intrinsics: */ if (load) { -*inout = vtn_pointer_load(b, ptr); +*inout = vtn_pointer_load(b, ptr, ptr->type); } else { vtn_pointer_store(b, *inout, ptr); } @@ -1244,12 +1244,12 @@ _vtn_variable_load_store(struct vtn_builder *b, bool load, } struct vtn_ssa_value * -vtn_variable_load(struct vtn_builder *b, struct vtn_pointer *src) +vtn_variable_load(struct vtn_builder *b, struct vtn_pointer *src, struct vtn_type *type) { if (vtn_pointer_is_external_block(b, src)) { return vtn_block_load(b, src); } else if (!src->var) { - return vtn_pointer_load(b, src); + return vtn_pointer_load(b, src, type); } else { struct vtn_ssa_value *val = NULL; _vtn_variable_load_store(b, true, src, ); @@ -1298,7 +1298,7 @@ _vtn_variable_copy(struct vtn_builder *b, struct vtn_pointer *dest, * ensure that matrices get loaded in the optimal way even if they * are storred row-major in a UBO. */ - vtn_variable_store(b, vtn_variable_load(b, src), dest); + vtn_variable_store(b, vtn_variable_load(b, src, src->type), dest); return; case GLSL_TYPE_ARRAY: @@ -2322,7 +2322,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, return; } - vtn_push_ssa(b, w[2], res_type, vtn_variable_load(b, src)); + vtn_push_ssa(b, w[2], res_type, vtn_variable_load(b, src, res_type)); break; } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 14/19] nir/vtn/opencl: support fma
Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/spirv/vtn_opencl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/compiler/spirv/vtn_opencl.c b/src/compiler/spirv/vtn_opencl.c index 3c5ecd22452..723a7edf9c2 100644 --- a/src/compiler/spirv/vtn_opencl.c +++ b/src/compiler/spirv/vtn_opencl.c @@ -58,6 +58,7 @@ static nir_op nir_alu_op_for_opencl_opcode(struct vtn_builder *b, enum OpenCLstd opcode) { switch (opcode) { + case Fma: return nir_op_ffma; case SHadd: return nir_op_ihadd; case UHadd: return nir_op_uhadd; default: @@ -236,6 +237,7 @@ vtn_handle_opencl_instruction(struct vtn_builder *b, uint32_t ext_opcode, switch (ext_opcode) { case SHadd: case UHadd: + case Fma: handle_instr(b, ext_opcode, w, count, handle_alu); return true; case Vloadn: -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 09/19] nir/vtn: initial OpenCL.std extension
From: Rob Clark <robdcl...@gmail.com> Not complete, mostly just adding things as I encounter them in CTS. But not getting far enough yet to hit most of the OpenCL.std instructions. v2: update hadd definition (Karol Herbst <kher...@redhat.com>) Signed-off-by: Rob Clark <robdcl...@gmail.com> Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/nir/meson.build | 1 + src/compiler/nir/nir_opcodes.py | 3 +- src/compiler/spirv/spirv_to_nir.c | 2 + src/compiler/spirv/vtn_opencl.c | 266 ++ src/compiler/spirv/vtn_private.h | 3 + 5 files changed, 274 insertions(+), 1 deletion(-) create mode 100644 src/compiler/spirv/vtn_opencl.c diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index a70c236b958..213a139a1b8 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -192,6 +192,7 @@ files_libnir = files( '../spirv/vtn_amd.c', '../spirv/vtn_cfg.c', '../spirv/vtn_glsl450.c', + '../spirv/vtn_opencl.c', '../spirv/vtn_private.h', '../spirv/vtn_subgroup.c', '../spirv/vtn_variables.c', diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 65d13200624..86fd6b6d68e 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -768,4 +768,5 @@ dst.z = src2.x; dst.w = src3.x; """) - +binop("ihadd", tint, commutative, "(src0 >> 1) + (src1 >> 1) + (src0 & src1 & 1)") +binop("uhadd", tuint, commutative, "(src0 >> 1) + (src1 >> 1) + (src0 & src1 & 1)") diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 3acb3fc0b42..6a16d77a771 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -379,6 +379,8 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, } else if ((strcmp((const char *)[2], "SPV_AMD_gcn_shader") == 0) && (b->options && b->options->caps.gcn_shader)) { val->ext_handler = vtn_handle_amd_gcn_shader_instruction; + } else if (strcmp(ext, "OpenCL.std") == 0) { + val->ext_handler = vtn_handle_opencl_instruction; } else { vtn_fail("Unsupported extension: %s", ext); } diff --git a/src/compiler/spirv/vtn_opencl.c b/src/compiler/spirv/vtn_opencl.c new file mode 100644 index 000..3c5ecd22452 --- /dev/null +++ b/src/compiler/spirv/vtn_opencl.c @@ -0,0 +1,266 @@ +/* + * Copyright © 2018 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Rob Clark (robdcl...@gmail.com) + */ + +#include "vtn_private.h" +#include "OpenCL.std.h" + +typedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b, enum OpenCLstd opcode, +unsigned num_srcs, nir_ssa_def **srcs); + +static void +handle_instr(struct vtn_builder *b, enum OpenCLstd opcode, const uint32_t *w, + unsigned count, nir_handler handler) +{ + const struct glsl_type *dest_type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + + unsigned num_srcs = count - 5; + nir_ssa_def *srcs[3] = { NULL, }; + vtn_assert(num_srcs <= ARRAY_SIZE(srcs)); + for (unsigned i = 0; i < num_srcs; i++) { + srcs[i] = vtn_ssa_value(b, w[i + 5])->def; + } + + nir_ssa_def *result = handler(b, opcode, num_srcs, srcs); + if (result) { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_create_ssa_value(b, dest_type); + val->ssa->def = result; + } else { + vtn_assert(dest_type == glsl_void_type()); + } +} + +static nir_op +nir_alu_op_for_opencl_opcode(struct vtn_builder *b, enum OpenCLstd opc
[Mesa-dev] [PATCH v3 18/19] nir: kernel entrypoints can have arguments
From: Rob Clark <robdcl...@gmail.com> This assert is not valid for OpenCL kernels. TODO can we somehow conditionally assert based on glsl vs cl?? Signed-off-by: Rob Clark <robdcl...@gmail.com> Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/nir/nir.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 6a51b7c4ab1..fedda73aa5e 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1974,7 +1974,6 @@ nir_shader_get_entrypoint(nir_shader *shader) struct exec_node *func_node = exec_list_get_head(>functions); nir_function *func = exec_node_data(nir_function, func_node, node); assert(func->return_type == glsl_void_type()); - assert(func->num_params == 0); assert(func->impl); return func->impl; } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 10/19] RFC: nir/vtn: handle constant builtins from kernels
With SPIR-V it is perfectly fine to declare builtins as constants and have no constant initializer on them. This change seems to be able to break Vulkan shaders, so please check if this is the correct thing here. Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/spirv/vtn_variables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index af9222d6f4e..80fca6e8a32 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -1580,7 +1580,6 @@ apply_var_decoration(struct vtn_builder *b, nir_variable *nir_var, nir_var->data.invariant = true; break; case SpvDecorationConstant: - vtn_assert(nir_var->constant_initializer != NULL); nir_var->data.read_only = true; break; case SpvDecorationNonReadable: @@ -2031,6 +2030,7 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val, case vtn_variable_mode_global: case vtn_variable_mode_image: case vtn_variable_mode_sampler: + case vtn_variable_mode_const: /* For these, we create the variable normally */ var->var = rzalloc(b->shader, nir_variable); var->var->name = ralloc_strdup(var->var, val->name); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 06/19] RFC: nir/vtn: "raw" pointer support
From: Rob Clark <robdcl...@gmail.com> An attempt to add physical pointer support to vtn. I'm not totally happy about the handling of logical pointers vs physical pointers. So this is really more of an RFS (request for suggestions) v2: treat vec3 types as vec4 when dereferencing Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/spirv/spirv_to_nir.c | 87 --- src/compiler/spirv/vtn_private.h | 20 ++- src/compiler/spirv/vtn_variables.c | 300 - 3 files changed, 347 insertions(+), 60 deletions(-) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 334bcab9a82..d58a68f80ef 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -572,6 +572,7 @@ vtn_types_compatible(struct vtn_builder *b, vtn_types_compatible(b, t1->array_element, t2->array_element); case vtn_base_type_pointer: + case vtn_base_type_raw_pointer: return vtn_types_compatible(b, t1->deref, t2->deref); case vtn_base_type_struct: @@ -609,6 +610,7 @@ vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) case vtn_base_type_matrix: case vtn_base_type_array: case vtn_base_type_pointer: + case vtn_base_type_raw_pointer: case vtn_base_type_image: case vtn_base_type_sampler: case vtn_base_type_sampled_image: @@ -939,6 +941,14 @@ vtn_type_layout_std430(struct vtn_builder *b, struct vtn_type *type, return type; } + case vtn_base_type_raw_pointer: { + uint32_t comp_size = b->ptr_size / 8; + vtn_assert(comp_size); + *size_out = comp_size; + *align_out = comp_size; + return type; + } + case vtn_base_type_vector: { uint32_t comp_size = glsl_get_bit_size(type->type) / 8; assert(type->length > 0 && type->length <= 4); @@ -1003,6 +1013,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, val->type->base_type = vtn_base_type_scalar; val->type->type = glsl_bool_type(); val->type->length = 1; + val->type->stride = 4; break; case SpvOpTypeInt: { int bit_size = w[2]; @@ -1025,6 +1036,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, vtn_fail("Invalid int bit size"); } val->type->length = 1; + val->type->stride = bit_size / 8; break; } @@ -1045,6 +1057,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, vtn_fail("Invalid float bit size"); } val->type->length = 1; + val->type->stride = bit_size / 8; break; } @@ -1061,6 +1074,10 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems); val->type->length = elems; val->type->stride = glsl_get_bit_size(base->type) / 8; + /* special case: vec3 is aligned to vec4 */ + if (elems == 3) + elems = 4; + val->type->stride *= elems; val->type->array_element = base; break; } @@ -1138,7 +1155,11 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, const char *name = val->name ? val->name : "struct"; - val->type->type = glsl_struct_type(fields, num_fields, name, false); + val->type->type = glsl_struct_type(fields, num_fields, name, + val->type->packed); + // TODO stride for a struct only matters for kernel shaders, where + // cl_size is the right thing.. but still a bit ugly to hard-code. + val->type->stride = glsl_get_cl_size(val->type->type); break; } @@ -1167,25 +1188,47 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, val->type->storage_class = storage_class; val->type->deref = deref_type; - if (storage_class == SpvStorageClassUniform || - storage_class == SpvStorageClassStorageBuffer) { - /* These can actually be stored to nir_variables and used as SSA - * values so they need a real glsl_type. - */ - val->type->type = glsl_vector_type(GLSL_TYPE_UINT, 2); - } - - if (storage_class == SpvStorageClassWorkgroup && - b->options->lower_workgroup_access_to_offsets) { + // XXX handling the "fake" glsl pointers vs "raw" pointers in kernel + // is a bit ugly.. need to understand how "pointers" are used in vk + // and figure out something better + if (storage_class == SpvStorageClassFunction || + storage_class == SpvStorageClassUniformConstant || + storage_class == SpvStorageClassWorkgroup || + !b->kernel_mode) { + if (storage_class == SpvStorageClassUniform || + storage_class ==
[Mesa-dev] [PATCH v3 08/19] nir/vtn: import OpenCL.std.h
From: Rob Clark <robdcl...@gmail.com> Lightly edited to be valid 'C' code. Is there a bug open to fix this upstream? Signed-off-by: Rob Clark <robdcl...@gmail.com> Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/spirv/OpenCL.std.h | 211 1 file changed, 211 insertions(+) create mode 100644 src/compiler/spirv/OpenCL.std.h diff --git a/src/compiler/spirv/OpenCL.std.h b/src/compiler/spirv/OpenCL.std.h new file mode 100644 index 000..1e9e7fc8d8a --- /dev/null +++ b/src/compiler/spirv/OpenCL.std.h @@ -0,0 +1,211 @@ +/* +** Copyright (c) 2015-2017 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef OpenCLstd_H +#define OpenCLstd_H + +enum OpenCLstd { + +// Section 2.1: Math extended instructions +Acos = 0, +Acosh = 1, +Acospi = 2, +Asin = 3, +Asinh = 4, +Asinpi = 5, +Atan = 6, +Atan2 = 7, +Atanh = 8, +Atanpi = 9, +Atan2pi = 10, +Cbrt = 11, +Ceil = 12, +Copysign = 13, +Cos = 14, +Cosh = 15, +Cospi = 16, +Erfc = 17, +Erf = 18, +Exp = 19, +Exp2 = 20, +Exp10 = 21, +Expm1 = 22, +Fabs = 23, +Fdim = 24, +Floor = 25, +Fma = 26, +Fmax = 27, +Fmin = 28, +Fmod = 29, +Fract = 30, +Frexp = 31, +Hypot = 32, +Ilogb = 33, +Ldexp = 34, +Lgamma = 35, +Lgamma_r = 36, +Log = 37, +Log2 = 38, +Log10 = 39, +Log1p = 40, +Logb = 41, +Mad = 42, +Maxmag = 43, +Minmag = 44, +Modf = 45, +Nan = 46, +Nextafter = 47, +Pow = 48, +Pown = 49, +Powr = 50, +Remainder = 51, +Remquo = 52, +Rint = 53, +Rootn = 54, +Round = 55, +Rsqrt = 56, +Sin = 57, +Sincos = 58, +Sinh = 59, +Sinpi = 60, +Sqrt = 61, +Tan = 62, +Tanh = 63, +Tanpi = 64, +Tgamma = 65, +Trunc = 66, +Half_cos = 67, +Half_divide = 68, +Half_exp = 69, +Half_exp2 = 70, +Half_exp10 = 71, +Half_log = 72, +Half_log2 = 73, +Half_log10 = 74, +Half_powr = 75, +Half_recip = 76, +Half_rsqrt = 77, +Half_sin = 78, +Half_sqrt = 79, +Half_tan = 80, +Native_cos = 81, +Native_divide = 82, +Native_exp = 83, +Native_exp2 = 84, +Native_exp10 = 85, +Native_log = 86, +Native_log2 = 87, +Native_log10 = 88, +Native_powr = 89, +Native_recip = 90, +Native_rsqrt = 91, +Native_sin = 92, +Native_sqrt = 93, +Native_tan = 94, + +// Section 2.2: Integer instructions +SAbs = 141, +SAbs_diff = 142, +SAdd_sat = 143, +UAdd_sat = 144, +SHadd = 145, +UHadd = 146, +SRhadd = 147, +URhadd = 148, +SClamp = 149, +UClamp = 150, +Clz = 151, +Ctz = 152, +SMad_hi = 153, +UMad_sat = 154, +SMad_sat = 155, +SMax = 156, +UMax = 157, +SMin = 158, +UMin = 159, +SMul_hi = 160, +Rotate = 161, +SSub_sat = 162, +USub_sat = 163, +U_Upsample = 164, +S_Upsample = 165, +Popcount = 166, +SMad24 = 167, +UMad24 = 168, +SMul24 = 169, +UMul24 = 170, +UAbs = 201, +UAbs_diff = 202, +UMul_hi = 203, +UMad_hi = 204, + +// Section 2.3: Common instructions +FClamp = 95, +Degrees = 96, +FMax_common = 97, +FMin_common = 98, +Mix = 99, +Radians = 100, +Step = 101, +Smoothstep = 102, +Sign = 103, + +// Section 2.4: Geometric instructions +Cross = 104, +Distance = 105, +Length = 106, +Normalize = 107, +Fast_distance = 108, +Fast_length = 109, +Fast_
[Mesa-dev] [PATCH v3 12/19] nir: specify bit_size when loading system values
With OpenCL the size of some system value depends on the Physical model choosen, so we need a way to load any system value as 32 or 64 bit. Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/nir/nir_builder.h | 10 +--- src/compiler/nir/nir_lower_alpha_test.c | 2 +- src/compiler/nir/nir_lower_clip.c| 3 ++- src/compiler/nir/nir_lower_subgroups.c | 8 +++--- src/compiler/nir/nir_lower_system_values.c | 31 src/compiler/nir/nir_lower_two_sided_color.c | 2 +- src/compiler/nir/nir_lower_wpos_center.c | 2 +- src/compiler/spirv/vtn_subgroup.c| 2 +- src/gallium/auxiliary/nir/tgsi_to_nir.c | 3 ++- src/intel/blorp/blorp_blit.c | 2 +- src/intel/blorp/blorp_clear.c| 2 +- src/intel/compiler/brw_nir_lower_cs_intrinsics.c | 6 ++--- src/mesa/drivers/dri/i965/brw_tcs.c | 2 +- 13 files changed, 40 insertions(+), 35 deletions(-) diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 36e0ae3ac63..4e93cd08169 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -612,13 +612,14 @@ nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src) /* Generic builder for system values. */ static inline nir_ssa_def * -nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index) +nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index, + unsigned bit_size) { nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op); load->num_components = nir_intrinsic_infos[op].dest_components; load->const_index[0] = index; nir_ssa_dest_init(>instr, >dest, - nir_intrinsic_infos[op].dest_components, 32, NULL); + nir_intrinsic_infos[op].dest_components, bit_size, NULL); nir_builder_instr_insert(build, >instr); return >dest.ssa; } @@ -630,9 +631,10 @@ nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index) #define DEFINE_SYSTEM_VALUE(name)\ static inline nir_ssa_def * \ - nir_load_##name(nir_builder *build) \ + nir_load_##name(nir_builder *build, unsigned bit_size)\ { \ - return nir_load_system_value(build, nir_intrinsic_load_##name, 0); \ + return nir_load_system_value(build, nir_intrinsic_load_##name, 0, \ + bit_size);\ } #include "nir_intrinsics.h" diff --git a/src/compiler/nir/nir_lower_alpha_test.c b/src/compiler/nir/nir_lower_alpha_test.c index 6bf9ff142df..29f91ab9428 100644 --- a/src/compiler/nir/nir_lower_alpha_test.c +++ b/src/compiler/nir/nir_lower_alpha_test.c @@ -92,7 +92,7 @@ nir_lower_alpha_test(nir_shader *shader, enum compare_func func, nir_ssa_def *condition = nir_compare_func(, func, - alpha, nir_load_alpha_ref_float()); + alpha, nir_load_alpha_ref_float(, 32)); nir_intrinsic_instr *discard = nir_intrinsic_instr_create(b.shader, diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c index ea12f51a7bb..b9a91f7d40b 100644 --- a/src/compiler/nir/nir_lower_clip.c +++ b/src/compiler/nir/nir_lower_clip.c @@ -174,7 +174,8 @@ lower_clip_vs(nir_function_impl *impl, unsigned ucp_enables, for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) { if (ucp_enables & (1 << plane)) { nir_ssa_def *ucp = -nir_load_system_value(, nir_intrinsic_load_user_clip_plane, plane); +nir_load_system_value(, nir_intrinsic_load_user_clip_plane, + plane, 32); /* calculate clipdist[plane] - dot(ucp, cv): */ clipdist[plane] = nir_fdot4(, ucp, cv); diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c index 0d3c83b7951..7e910c013a9 100644 --- a/src/compiler/nir/nir_lower_subgroups.c +++ b/src/compiler/nir/nir_lower_subgroups.c @@ -190,7 +190,7 @@ static nir_ssa_def * lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin, bool lower_to_scalar) { - nir_ssa_def *index = nir_load_subgroup_invocation(b); + nir_ssa_def *index = nir_load_subgroup_invocation(b, 32); switch (intrin->intrinsic) { case nir_intrinsic_shuffle_xor: assert(intrin->src[1].is_ssa); @@ -300,7 +300,7 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin, assert(options->subgroup_size <= 64); uint64_t group_mask = ~0ull
[Mesa-dev] [PATCH v3 03/19] glsl: add packed for struct types
We need this for OpenCL kernels because we have to apply C rules for alignment and padding inside structs and for this we also have to know if a struct is packed or not. Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/glsl_types.cpp | 17 +++-- src/compiler/glsl_types.h | 12 ++-- src/compiler/nir_types.cpp| 5 +++-- src/compiler/nir_types.h | 3 ++- src/compiler/spirv/spirv_to_nir.c | 10 +- src/compiler/spirv/vtn_private.h | 7 +++ 6 files changed, 42 insertions(+), 12 deletions(-) diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp index a73caa908bf..12a8a546938 100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@ -90,11 +90,11 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type, } glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, - const char *name) : + const char *name, bool packed) : gl_type(0), base_type(GLSL_TYPE_STRUCT), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - interface_packing(0), interface_row_major(0), + interface_packing(0), interface_row_major(0), packed(packed), vector_elements(0), matrix_columns(0), length(num_fields) { @@ -1131,9 +1131,10 @@ glsl_type::record_key_hash(const void *a) const glsl_type * glsl_type::get_record_instance(const glsl_struct_field *fields, unsigned num_fields, - const char *name) + const char *name, + bool packed) { - const glsl_type key(fields, num_fields, name); + const glsl_type key(fields, num_fields, name, packed); mtx_lock(_type::hash_mutex); @@ -1145,7 +1146,7 @@ glsl_type::get_record_instance(const glsl_struct_field *fields, const struct hash_entry *entry = _mesa_hash_table_search(record_types, ); if (entry == NULL) { - const glsl_type *t = new glsl_type(fields, num_fields, name); + const glsl_type *t = new glsl_type(fields, num_fields, name, packed); entry = _mesa_hash_table_insert(record_types, t, (void *) t); } @@ -1153,6 +1154,7 @@ glsl_type::get_record_instance(const glsl_struct_field *fields, assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_STRUCT); assert(((glsl_type *) entry->data)->length == num_fields); assert(strcmp(((glsl_type *) entry->data)->name, name) == 0); + assert(((glsl_type *) entry->data)->packed == packed); mtx_unlock(_type::hash_mutex); @@ -2262,6 +2264,8 @@ encode_type_to_blob(struct blob *blob, const glsl_type *type) if (type->is_interface()) { blob_write_uint32(blob, type->interface_packing); blob_write_uint32(blob, type->interface_row_major); + } else { + blob_write_uint32(blob, type->packed); } return; case GLSL_TYPE_VOID: @@ -2341,7 +2345,8 @@ decode_type_from_blob(struct blob_reader *blob) t = glsl_type::get_interface_instance(fields, num_fields, packing, row_major, name); } else { - t = glsl_type::get_record_instance(fields, num_fields, name); + unsigned packed = blob_read_uint32(blob); + t = glsl_type::get_record_instance(fields, num_fields, name, packed); } free(fields); diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h index 6982d52e392..34d03505ae8 100644 --- a/src/compiler/glsl_types.h +++ b/src/compiler/glsl_types.h @@ -164,6 +164,13 @@ struct glsl_type { unsigned interface_packing:2; unsigned interface_row_major:1; + /** +* For \c GLSL_TYPE_STRUCT this specifies if the struct is packed or not. +* +* Only used for Compute kernels +*/ + unsigned packed:1; + private: glsl_type() : mem_ctx(NULL) { @@ -286,7 +293,8 @@ public: */ static const glsl_type *get_record_instance(const glsl_struct_field *fields, unsigned num_fields, - const char *name); + const char *name, + bool packed = false); /** * Get the instance of an interface block type @@ -867,7 +875,7 @@ private: /** Constructor for record types */ glsl_type(const glsl_struct_field *fields, unsigned num_fields, -const char *name); +const char *name, bool packed = false); /** Constructor for interface types */ glsl_type(const glsl_struct_field *fields, unsigned num_fields, diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp index ee6b06aea63..76a9cf2fde7 100644 --- a/src/compiler/nir_types.cpp +++ b/sr
[Mesa-dev] [PATCH v3 07/19] nir/vtn: print extension name in fail msg
From: Rob Clark <robdcl...@gmail.com> Signed-off-by: Rob Clark <robdcl...@gmail.com> Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/spirv/spirv_to_nir.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index d58a68f80ef..3acb3fc0b42 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -370,16 +370,17 @@ static void vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { + const char *ext = (const char *)[2]; switch (opcode) { case SpvOpExtInstImport: { struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); - if (strcmp((const char *)[2], "GLSL.std.450") == 0) { + if (strcmp(ext, "GLSL.std.450") == 0) { val->ext_handler = vtn_handle_glsl450_instruction; } else if ((strcmp((const char *)[2], "SPV_AMD_gcn_shader") == 0) && (b->options && b->options->caps.gcn_shader)) { val->ext_handler = vtn_handle_amd_gcn_shader_instruction; } else { - vtn_fail("Unsupported extension"); + vtn_fail("Unsupported extension: %s", ext); } break; } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 13/19] nir/vtn: Handle OpInBoundsPtrAccessChain
From: Rob Clark <robdcl...@gmail.com> Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/spirv/spirv_to_nir.c | 1 + src/compiler/spirv/vtn_variables.c | 1 + 2 files changed, 2 insertions(+) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 6a16d77a771..3b86aef2978 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -3732,6 +3732,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpCopyMemorySized: case SpvOpAccessChain: case SpvOpPtrAccessChain: + case SpvOpInBoundsPtrAccessChain: case SpvOpInBoundsAccessChain: case SpvOpArrayLength: vtn_handle_variables(b, opcode, w, count); diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 51f73b3cf8c..6cf1a63f8c9 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -2259,6 +2259,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvOpAccessChain: case SpvOpPtrAccessChain: + case SpvOpInBoundsPtrAccessChain: case SpvOpInBoundsAccessChain: { struct vtn_type *ptr_type = vtn_value(b, w[1], vtn_value_type_type)->type; struct vtn_value *base_val = vtn_untyped_value(b, w[3]); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 11/19] nir/vtn: pointers can point to cross_workgroup or local memory as well
Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/spirv/vtn_variables.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 80fca6e8a32..51f73b3cf8c 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -1917,7 +1917,9 @@ vtn_pointer_to_ssa(struct vtn_builder *b, struct vtn_pointer *ptr) ptr->mode == vtn_variable_mode_ssbo); return nir_vec2(>nb, ptr->block_index, ptr->offset); } else { - vtn_assert(ptr->mode == vtn_variable_mode_workgroup); + vtn_assert(ptr->mode == vtn_variable_mode_workgroup || + ptr->mode == vtn_variable_mode_cross_workgroup || + ptr->mode == vtn_variable_mode_local); return ptr->offset; } } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 02/19] vtn: handle SpvExecutionModelKernel
Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/spirv/spirv_to_nir.c | 3 +++ src/compiler/spirv/vtn_private.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 7ce7e9ba62e..edf02db584b 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -3178,6 +3178,9 @@ stage_for_execution_model(struct vtn_builder *b, SpvExecutionModel model) return MESA_SHADER_FRAGMENT; case SpvExecutionModelGLCompute: return MESA_SHADER_COMPUTE; + case SpvExecutionModelKernel: + b->kernel_mode = true; + return MESA_SHADER_COMPUTE; default: vtn_fail("Unsupported execution model"); } diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h index 70f660fbd48..9f5a22905f1 100644 --- a/src/compiler/spirv/vtn_private.h +++ b/src/compiler/spirv/vtn_private.h @@ -580,6 +580,8 @@ struct vtn_builder { unsigned func_param_idx; bool has_loop_continue; + + bool kernel_mode; }; nir_ssa_def * -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 04/19] glsl: add glsl_base_get_byte_size
Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/glsl_types.h | 34 ++ src/compiler/nir_types.h | 30 +- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h index 34d03505ae8..2e63261090e 100644 --- a/src/compiler/glsl_types.h +++ b/src/compiler/glsl_types.h @@ -1068,4 +1068,38 @@ glsl_align(unsigned int a, unsigned int align) return (a + align - 1) / align * align; } +static inline unsigned +glsl_base_get_byte_size(const enum glsl_base_type base_type) +{ + switch (base_type) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: /* TODO handle mediump */ + case GLSL_TYPE_SUBROUTINE: + return 4; + + case GLSL_TYPE_FLOAT16: + case GLSL_TYPE_UINT16: + case GLSL_TYPE_INT16: + return 2; + + case GLSL_TYPE_UINT8: + case GLSL_TYPE_INT8: + return 1; + + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_INT64: + case GLSL_TYPE_UINT64: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_SAMPLER: + return 8; + + default: + unreachable("unknown base type"); + } + + return 0; +} + #endif /* GLSL_TYPES_H */ diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h index 8687d4f1336..033b3ae739b 100644 --- a/src/compiler/nir_types.h +++ b/src/compiler/nir_types.h @@ -86,35 +86,7 @@ unsigned glsl_get_record_location_offset(const struct glsl_type *type, static inline unsigned glsl_get_bit_size(const struct glsl_type *type) { - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_FLOAT: /* TODO handle mediump */ - case GLSL_TYPE_SUBROUTINE: - return 32; - - case GLSL_TYPE_FLOAT16: - case GLSL_TYPE_UINT16: - case GLSL_TYPE_INT16: - return 16; - - case GLSL_TYPE_UINT8: - case GLSL_TYPE_INT8: - return 8; - - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_INT64: - case GLSL_TYPE_UINT64: - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_SAMPLER: - return 64; - - default: - unreachable("unknown base type"); - } - - return 0; + return glsl_base_get_byte_size(glsl_get_base_type(type)) * 8; } bool glsl_type_is_64bit(const struct glsl_type *type); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 01/19] nir: add load/store_global intrinsics
OpenCL kernels have raw pointers to global memory, so we need instructions to load/store in order to dereference these pointers. In some ways similar to other load/store intrinsics, but rather than taking an offset as a src argument, they take a raw pointer value (which can be 32b or 64b depending on the memory model). Signed-off-by: Rob Clark <robdcl...@gmail.com> Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/nir/nir_intrinsics.h | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index 7b737559d5a..6597eaea87b 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -511,6 +511,8 @@ LOAD(shared, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) /* src[] = { offset }. const_index[] = { base, range } */ LOAD(push_constant, 1, 2, BASE, RANGE, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { address }. No const_index */ +LOAD(global, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) /* * Stores work the same way as loads, except now the first source is the value @@ -532,8 +534,10 @@ STORE(per_vertex_output, 3, 3, BASE, WRMASK, COMPONENT, 0) STORE(ssbo, 3, 1, WRMASK, xx, xx, 0) /* src[] = { value, offset }. const_index[] = { base, write_mask } */ STORE(shared, 2, 2, BASE, WRMASK, xx, 0) +/* src[] = { value, address }. const_index[] = { write_mask } */ +STORE(global, 2, 1, WRMASK, xx, xx, 0) -LAST_INTRINSIC(store_shared) +LAST_INTRINSIC(store_global) #undef DEFINE_SYSTEM_VALUE #undef INTRINSIC -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 05/19] RFC glsl: add cl_size and cl_alignment
v2: fix cl_size for arrays_of_arrays Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/compiler/glsl_types.cpp | 48 + src/compiler/glsl_types.h | 10 ++ src/compiler/nir_types.cpp | 12 src/compiler/nir_types.h| 4 4 files changed, 74 insertions(+) diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp index 12a8a546938..2bf44c6fc30 100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@ -2360,3 +2360,51 @@ decode_type_from_blob(struct blob_reader *blob) return NULL; } } + +unsigned +glsl_type::cl_alignment() const +{ + /* vectors unlike arrays are aligned to their size */ + if (this->is_scalar() || this->is_vector()) + return this->cl_size(); + else if (this->is_array()) + return this->without_array()->cl_alignment(); + else if (this->is_record()) { + /* Packed Structs are 0x1 aligned despite their size. */ + if (this->packed) + return 1; + + unsigned res = 1; + for (unsigned i = 0; i < this->length; ++i) { + struct glsl_struct_field = this->fields.structure[i]; + res = MAX2(res, field.type->cl_alignment()); + } + return res; + } + return 1; +} + +unsigned +glsl_type::cl_size() const +{ + if (this->is_scalar()) { + return glsl_base_get_byte_size(this->base_type); + } else if (this->is_vector()) { + unsigned vec_elemns = this->vector_elements == 3 ? 4 : this->vector_elements; + return vec_elemns * glsl_base_get_byte_size(this->base_type); + } else if (this->is_array()) { + unsigned size = this->without_array()->cl_size(); + return size * this->arrays_of_arrays_size(); + } else if (this->is_record()) { + unsigned size = 0; + for (unsigned i = 0; i < this->length; ++i) { + struct glsl_struct_field = this->fields.structure[i]; + /* if a struct is packed, members don't get aligned */ + if (!this->packed) +size = align(size, field.type->cl_alignment()); + size += field.type->cl_size(); + } + return size; + } + return 1; +} diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h index 2e63261090e..6fb5d3c7881 100644 --- a/src/compiler/glsl_types.h +++ b/src/compiler/glsl_types.h @@ -408,6 +408,16 @@ public: */ unsigned std430_size(bool row_major) const; + /** +* Alignment in bytes of the start of this type in OpenCL memory. +*/ + unsigned cl_alignment() const; + + /** +* Size in bytes of this type in OpenCL memory +*/ + unsigned cl_size() const; + /** * \brief Can this type be implicitly converted to another? * diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp index 76a9cf2fde7..ce4ace82c1c 100644 --- a/src/compiler/nir_types.cpp +++ b/src/compiler/nir_types.cpp @@ -463,3 +463,15 @@ glsl_channel_type(const glsl_type *t) unreachable("Unhandled base type glsl_channel_type()"); } } + +unsigned +glsl_get_cl_size(const struct glsl_type *type) +{ + return type->cl_size(); +} + +unsigned +glsl_get_cl_alignment(const struct glsl_type *type) +{ + return type->cl_alignment(); +} diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h index 033b3ae739b..47239a6b7ce 100644 --- a/src/compiler/nir_types.h +++ b/src/compiler/nir_types.h @@ -83,6 +83,10 @@ enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type); unsigned glsl_get_record_location_offset(const struct glsl_type *type, unsigned length); +unsigned glsl_get_cl_size(const struct glsl_type *type); + +unsigned glsl_get_cl_alignment(const struct glsl_type *type); + static inline unsigned glsl_get_bit_size(const struct glsl_type *type) { -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 00/19] nir/vtn/compiler: first batch of compute support
second series here: https://lists.freedesktop.org/archives/mesa-dev/2018-March/188218.html Main difference to the last series is, that I tried to focus on the real core pars we need to get basic OpenCL support in spirv_to_nir, so that we can run more or less complex examples. There are some important core NIR changes and somebody should take a closer look at those. Karol Herbst (12): nir: add load/store_global intrinsics vtn: handle SpvExecutionModelKernel glsl: add packed for struct types glsl: add glsl_base_get_byte_size RFC glsl: add cl_size and cl_alignment RFC: nir/vtn: handle constant builtins from kernels nir/vtn: pointers can point to cross_workgroup or local memory as well nir: specify bit_size when loading system values nir/vtn/opencl: support fma nir: add load_kernel_param RFC nir/lower_io: lower kernel entry param load_vars to load_kernel_param RFC: nir/vtn: member in struct deref Rob Clark (7): RFC: nir/vtn: "raw" pointer support nir/vtn: print extension name in fail msg nir/vtn: import OpenCL.std.h nir/vtn: initial OpenCL.std extension nir/vtn: Handle OpInBoundsPtrAccessChain nir: use load_local_group_size nir: kernel entrypoints can have arguments src/compiler/glsl_types.cpp | 65 - src/compiler/glsl_types.h| 56 +++- src/compiler/nir/meson.build | 1 + src/compiler/nir/nir.h | 1 - src/compiler/nir/nir_builder.h | 10 +- src/compiler/nir/nir_intrinsics.h| 8 +- src/compiler/nir/nir_lower_alpha_test.c | 2 +- src/compiler/nir/nir_lower_clip.c| 3 +- src/compiler/nir/nir_lower_io.c | 39 ++- src/compiler/nir/nir_lower_subgroups.c | 8 +- src/compiler/nir/nir_lower_system_values.c | 48 ++-- src/compiler/nir/nir_lower_two_sided_color.c | 2 +- src/compiler/nir/nir_lower_wpos_center.c | 2 +- src/compiler/nir/nir_opcodes.py | 3 +- src/compiler/nir_types.cpp | 17 +- src/compiler/nir_types.h | 37 +-- src/compiler/spirv/OpenCL.std.h | 211 +++ src/compiler/spirv/spirv_to_nir.c| 106 ++-- src/compiler/spirv/vtn_opencl.c | 268 +++ src/compiler/spirv/vtn_private.h | 35 ++- src/compiler/spirv/vtn_subgroup.c| 2 +- src/compiler/spirv/vtn_variables.c | 313 +++ src/gallium/auxiliary/nir/tgsi_to_nir.c | 3 +- src/intel/blorp/blorp_blit.c | 2 +- src/intel/blorp/blorp_clear.c| 2 +- src/intel/compiler/brw_nir_lower_cs_intrinsics.c | 6 +- src/mesa/drivers/dri/i965/brw_tcs.c | 2 +- 27 files changed, 1099 insertions(+), 153 deletions(-) create mode 100644 src/compiler/spirv/OpenCL.std.h create mode 100644 src/compiler/spirv/vtn_opencl.c -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 28/34] nvir/nir: implement geometry shader nir_intrinsics
v4: use smarter getIndirect helper use new getSlotAddress helper use loadFrom helper Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 27 ++ 1 file changed, 27 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index e73f4ae1e36..46b2f3e5770 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -412,6 +412,10 @@ operation Converter::getOperation(nir_intrinsic_op op) { switch (op) { + case nir_intrinsic_emit_vertex: + return OP_EMIT; + case nir_intrinsic_end_primitive: + return OP_RESTART; default: ERROR("couldn't get operation for nir_intrinsic_op %u\n", op); assert(false); @@ -1864,6 +1868,29 @@ Converter::visit(nir_intrinsic_instr *insn) ->subOp = NV50_IR_SUBOP_SHFL_IDX; break; } + case nir_intrinsic_load_per_vertex_input: { + const DataType dType = getDType(insn); + LValues = convert(>dest); + Value *indirectVertex; + Value *indirectOffset; + auto baseVertex = getIndirect(>src[0], 0, ); + auto idx = getIndirect(insn, 1, 0, ); + + Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS), + mkImm(baseVertex), indirectVertex); + for (auto i = 0u; i < insn->num_components; ++i) { + uint32_t address = getSlotAddress(insn, idx, i); + loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0, + indirectOffset, vtxBase, info->in[idx].patch); + } + break; + } + case nir_intrinsic_emit_vertex: + case nir_intrinsic_end_primitive: { + auto idx = nir_intrinsic_stream_id(insn); + mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1; + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 29/34] nvir/nir: implement nir_intrinsic_load_ubo
v4: use loadFrom helper Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 14 ++ 1 file changed, 14 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 46b2f3e5770..c415fa71738 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -1891,6 +1891,20 @@ Converter::visit(nir_intrinsic_instr *insn) mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1; break; } + case nir_intrinsic_load_ubo: { + const DataType dType = getDType(insn); + LValues = convert(>dest); + Value *indirectIndex; + Value *indirectOffset; + uint32_t index = getIndirect(>src[0], 0, ) + 1; + uint32_t offset = getIndirect(>src[1], 0, ); + + for (auto i = 0u; i < insn->num_components; ++i) { + loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i, + indirectOffset, indirectIndex); + } + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 31/34] nvir/nir: implement images
v3: fix compiler warnings v4: use loadFrom helper v5: fix signed min/max v6: set tex mask add support for indirect image access set cache mode Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 395 +++-- 1 file changed, 375 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index e15f1734cc3..5c3fde32601 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -91,6 +91,8 @@ private: LValues& convert(nir_register *); LValues& convert(nir_ssa_def *); + ImgFormat convertGLImgFormat(GLuint); + Value* getSrc(nir_alu_src *, uint8_t component = 0); Value* getSrc(nir_register *, uint8_t); Value* getSrc(nir_src *, uint8_t, bool indirect = false); @@ -120,6 +122,7 @@ private: DataType getDType(nir_alu_instr*); DataType getDType(nir_intrinsic_instr*); + DataType getDType(nir_intrinsic_instr*, bool isSigned); DataType getDType(nir_op, NirSSADefBitSize); std::vector getSTypes(nir_alu_instr*); @@ -153,6 +156,11 @@ private: /* tex stuff */ Value* applyProjection(Value *src, Value *proj); + unsigned int getNIRArgCount(TexInstruction::Target&); + + /* image stuff */ + uint16_t derefImageVar(nir_deref_var *, Value **indirect); + CacheMode getCacheModeFromVar(nir_variable *); nir_shader *nir; @@ -244,11 +252,30 @@ Converter::getDType(nir_alu_instr *insn) DataType Converter::getDType(nir_intrinsic_instr *insn) +{ + bool isSigned; + switch (insn->intrinsic) { + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_imin: + isSigned = true; + break; + default: + isSigned = false; + break; + } + + return getDType(insn, isSigned); +} + +DataType +Converter::getDType(nir_intrinsic_instr *insn, bool isSigned) { if (insn->dest.is_ssa) - return typeOfSize(insn->dest.ssa.bit_size / 8, false, false); + return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned); else - return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false); + return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned); } DataType @@ -445,28 +472,31 @@ Converter::getSubOp(nir_op op) } } +#define CASE_OP_INTR_ATOM(nir, nvir) \ + case nir_intrinsic_image_atomic_ ## nir : \ + case nir_intrinsic_shared_atomic_ ## nir : \ + case nir_intrinsic_ssbo_atomic_ ## nir : \ + return NV50_IR_SUBOP_ATOM_ ## nvir +#define CASE_OP_INTR_ATOM_S(nir, nvir) \ + case nir_intrinsic_shared_atomic_ ## nir : \ + case nir_intrinsic_ssbo_atomic_ ## nir : \ + return NV50_IR_SUBOP_ATOM_ ## nvir int Converter::getSubOp(nir_intrinsic_op op) { switch (op) { - case nir_intrinsic_ssbo_atomic_add: - return NV50_IR_SUBOP_ATOM_ADD; - case nir_intrinsic_ssbo_atomic_and: - return NV50_IR_SUBOP_ATOM_AND; - case nir_intrinsic_ssbo_atomic_comp_swap: - return NV50_IR_SUBOP_ATOM_CAS; - case nir_intrinsic_ssbo_atomic_exchange: - return NV50_IR_SUBOP_ATOM_EXCH; - case nir_intrinsic_ssbo_atomic_or: - return NV50_IR_SUBOP_ATOM_OR; - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - return NV50_IR_SUBOP_ATOM_MAX; - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umin: - return NV50_IR_SUBOP_ATOM_MIN; - case nir_intrinsic_ssbo_atomic_xor: - return NV50_IR_SUBOP_ATOM_XOR; + CASE_OP_INTR_ATOM(add, ADD); + CASE_OP_INTR_ATOM(and, AND); + CASE_OP_INTR_ATOM(comp_swap, CAS); + CASE_OP_INTR_ATOM(exchange, EXCH); + CASE_OP_INTR_ATOM(or, OR); + case nir_intrinsic_image_atomic_max: + CASE_OP_INTR_ATOM_S(imax, MAX); + CASE_OP_INTR_ATOM_S(umax, MAX); + case nir_intrinsic_image_atomic_min: + CASE_OP_INTR_ATOM_S(imin, MIN); + CASE_OP_INTR_ATOM_S(umin, MIN); + CASE_OP_INTR_ATOM(xor, XOR); case nir_intrinsic_vote_all: return NV50_IR_SUBOP_VOTE_ALL; case nir_intrinsic_vote_any: @@ -479,6 +509,8 @@ Converter::getSubOp(nir_intrinsic_op op) return 0; } } +#undef CASE_OP_INTR_ATOM +#undef CASE_OP_INTR_ATOM_S CondCode Converter::getCondCode(nir_op op) @@ -1628,6 +1660,68 @@ Converter::convert(nir_intrinsic_op intr) } } +ImgFormat +Converter::convertGLImgFormat(GLuint format) +{ +#define FMT_CASE(a, b) \ + case GL_ ## a: return nv50_ir::FMT_ ## b + + switch (format) { + FMT_CASE(NONE, NONE); + + FMT_CASE(RGBA32F, RGBA32F); + FMT_CASE(RGBA16F, RGBA16F); + FMT_CASE(RG32F, RG32F); + FMT_CASE(RG16F, RG16F); + FMT_CASE(R11F_G11F_B10F, R11G11B10F); + FMT_CASE(R32F, R32F); + FMT_CASE(R16F, R16F); + + FMT_CASE(RGBA32UI, RGBA32UI); + FMT_CASE(RG
[Mesa-dev] [PATCH v6 26/34] nvir/nir: implement vote and ballot
v2: add vote_eq support use the new subop intrinsic helper add ballot v3: add read_(first_)invocation Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 42 ++ 1 file changed, 42 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index ebf6a5ceb5c..f594e299645 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -443,6 +443,12 @@ int Converter::getSubOp(nir_intrinsic_op op) { switch (op) { + case nir_intrinsic_vote_all: + return NV50_IR_SUBOP_VOTE_ALL; + case nir_intrinsic_vote_any: + return NV50_IR_SUBOP_VOTE_ANY; + case nir_intrinsic_vote_ieq: + return NV50_IR_SUBOP_VOTE_UNI; default: ERROR("couldn't get subop for nir_intrinsic_op %u\n", op); assert(false); @@ -1809,6 +1815,42 @@ Converter::visit(nir_intrinsic_instr *insn) loadImm(newDefs[0], 32u); break; } + case nir_intrinsic_vote_all: + case nir_intrinsic_vote_any: + case nir_intrinsic_vote_ieq: { + LValues = convert(>dest); + Value *pred = new_LValue(func, FILE_PREDICATE); + mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(>src[0], 0), zero); + mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op); + mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred); + break; + } + case nir_intrinsic_ballot: { + LValues = convert(>dest); + Value *pred = new_LValue(func, FILE_PREDICATE); + mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(>src[0], 0), zero); + Instruction *ballot = mkOp1(OP_VOTE, TYPE_U32, getSSA(), pred); + ballot->subOp = NV50_IR_SUBOP_VOTE_ANY; + mkOp2(OP_MERGE, TYPE_U64, newDefs[0], ballot->getDef(0), loadImm(getSSA(), 0)); + break; + } + case nir_intrinsic_read_first_invocation: + case nir_intrinsic_read_invocation: { + LValues = convert(>dest); + const DataType dType = getDType(insn); + Value *tmp = getScratch(); + + if (op == nir_intrinsic_read_first_invocation) { + mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY; + mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV; + mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT; + } else + tmp = getSrc(>src[1], 0); + + mkOp3(OP_SHFL, dType, newDefs[0], getSrc(>src[0], 0), tmp, mkImm(0x1f)) + ->subOp = NV50_IR_SUBOP_SHFL_IDX; + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 34/34] nvir/nir: implement intrinsic shader_clock
Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 8 1 file changed, 8 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 19086157baa..2f831bfe487 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -2329,6 +2329,14 @@ Converter::visit(nir_intrinsic_instr *insn) bar->subOp = getSubOp(op); break; } + case nir_intrinsic_shader_clock: { + const DataType dType = getDType(insn); + LValues = convert(>dest); + + loadImm(newDefs[0], 0u); + mkOp1v(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0)); + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 23/34] nvir/nir: implement nir_ssa_undef_instr
v2: use mkOp Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 13 + 1 file changed, 13 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 43a15fe0a76..6a43b764601 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -143,6 +143,7 @@ private: bool visit(nir_jump_instr *); bool visit(nir_load_const_instr*); bool visit(nir_loop *); + bool visit(nir_ssa_undef_instr *); nir_shader *nir; @@ -1451,6 +1452,8 @@ Converter::visit(nir_instr *insn) return visit(nir_instr_as_jump(insn)); case nir_instr_type_load_const: return visit(nir_instr_as_load_const(insn)); + case nir_instr_type_ssa_undef: + return visit(nir_instr_as_ssa_undef(insn)); default: ERROR("unknown nir_instr type %u\n", insn->type); return false; @@ -2111,6 +2114,16 @@ Converter::visit(nir_alu_instr *insn) } #undef DEFAULT_CHECKS +bool +Converter::visit(nir_ssa_undef_instr *insn) +{ + LValues = convert(>def); + for (auto i = 0u; i < insn->def.num_components; ++i) { + mkOp(OP_NOP, TYPE_NONE, newDefs[i]); + } + return true; +} + bool Converter::run() { -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 33/34] nvir/nir: implement load_per_vertex_output
v4: use smarter getIndirect helper use new getSlotAddress helper v5: use loadFrom helper Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 23 ++ 1 file changed, 23 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index bc0127bef84..19086157baa 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -2005,6 +2005,29 @@ Converter::visit(nir_intrinsic_instr *insn) } break; } + case nir_intrinsic_load_per_vertex_output: { + const DataType dType = getDType(insn); + LValues = convert(>dest); + Value *indirectVertex; + Value *indirectOffset; + auto baseVertex = getIndirect(>src[0], 0, ); + auto idx = getIndirect(insn, 1, 0, ); + Value *vtxBase = nullptr; + + if (indirectVertex) + vtxBase = indirectVertex; + else + vtxBase = loadImm(nullptr, baseVertex); + + vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase); + + for (auto i = 0u; i < insn->num_components; ++i) { + uint32_t address = getSlotAddress(insn, idx, i); + loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0, + indirectOffset, vtxBase, info->in[idx].patch); + } + break; + } case nir_intrinsic_emit_vertex: case nir_intrinsic_end_primitive: { auto idx = nir_intrinsic_stream_id(insn); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 32/34] nvir/nir: add memory barriers
v5: add more barrier intrinsics Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 19 +++ 1 file changed, 19 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 5c3fde32601..bc0127bef84 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -489,6 +489,14 @@ Converter::getSubOp(nir_intrinsic_op op) CASE_OP_INTR_ATOM(and, AND); CASE_OP_INTR_ATOM(comp_swap, CAS); CASE_OP_INTR_ATOM(exchange, EXCH); + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier: + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + return NV50_IR_SUBOP_MEMBAR(M, GL); + case nir_intrinsic_memory_barrier_shared: + return NV50_IR_SUBOP_MEMBAR(M, CTA); CASE_OP_INTR_ATOM(or, OR); case nir_intrinsic_image_atomic_max: CASE_OP_INTR_ATOM_S(imax, MAX); @@ -2287,6 +2295,17 @@ Converter::visit(nir_intrinsic_instr *insn) bar->subOp = NV50_IR_SUBOP_BAR_SYNC; break; } + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier: + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_shared: { + Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL); + bar->fixed = 1; + bar->subOp = getSubOp(op); + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 27/34] nvir/nir: implement variable indexing
we store those arrays in local memory and reserve some space for each of the arrays. The arrays are stored in a packed format, because we know quite easily the context of each index. We don't do that in TGSI so far. This causes various issues to come up in the MemoryOpt pass, because ld/st with indirects aren't guarenteed to be aligned to 0x10 anymore. v3: use fixed size vec4 arrays until we fix MemoryOpt v4: fix for 64 bit types v5: use loadFrom helper Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 58 ++ 1 file changed, 58 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index f594e299645..e73f4ae1e36 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -80,6 +80,7 @@ private: typedef decltype(nir_ssa_def().index) NirSSADefIdx; typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize; typedef std::unordered_map<NirSSADefIdx, LValues> NirDefMap; + typedef std::unordered_map<NirSSADefIdx, uint32_t> NirArrayLMemOffsets; typedef std::unordered_map<decltype(nir_block().index), BasicBlock*> NirBlockMap; TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow); @@ -157,6 +158,7 @@ private: NirDefMap ssaDefs; NirDefMap regDefs; + NirArrayLMemOffsets regToLmemOffset; NirBlockMap blocks; unsigned int curLoopDepth; @@ -1259,6 +1261,7 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op, bool Converter::parseNIR() { + info->bin.tlsSpace = 0; info->io.clipDistances = nir->info.clip_distance_array_size; info->io.cullDistances = nir->info.cull_distance_array_size; @@ -1348,6 +1351,16 @@ Converter::visit(nir_function *function) break; } + nir_foreach_register(reg, >impl->registers) { + if (reg->num_array_elems) { + /* TODO: packed variables would be nice, but MemoryOpt fails */ + /* replace 4 with reg->num_components */ + uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8); + regToLmemOffset[reg->index] = info->bin.tlsSpace; + info->bin.tlsSpace += size; + } + } + nir_index_ssa_defs(function->impl); foreach_list_typed(nir_cf_node, node, node, >impl->body) { if (!visit(node)) @@ -2035,6 +2048,51 @@ Converter::visit(nir_alu_instr *insn) * 2. they basically just merge multiple values into one data type */ CASE_OPFI(mov): + if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) { + nir_reg_dest& reg = insn->dest.dest.reg; + auto goffset = regToLmemOffset[reg.reg->index]; + auto comps = reg.reg->num_components; + auto size = reg.reg->bit_size / 8; + auto csize = 4 * size; /* TODO after fixing MemoryOpts: comps * size; */ + auto aoffset = csize * reg.base_offset; + Value *indirect = nullptr; + + if (reg.indirect) +indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), + getSrc(reg.indirect, 0), mkImm(csize)); + + for (auto i = 0u; i < comps; ++i) { +if (!((1u << i) & insn->dest.write_mask)) + continue; + +Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size); +mkStore(OP_STORE, dType, sym, indirect, getSrc(>src[0], i)); + } + break; + } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) { + LValues = convert(>dest); + nir_reg_src& reg = insn->src[0].src.reg; + auto goffset = regToLmemOffset[reg.reg->index]; + /* auto comps = reg.reg->num_components; */ + auto size = reg.reg->bit_size / 8; + auto csize = 4 * size; /* TODO after fixing MemoryOpts: comps * size; */ + auto aoffset = csize * reg.base_offset; + Value *indirect = nullptr; + + if (reg.indirect) +indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize)); + + for (auto i = 0u; i < newDefs.size(); ++i) +loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect); + + break; + } else { + LValues = convert(>dest); + for (LValues::size_type c = 0u; c < newDefs.size(); ++c) { +mkMov(newDefs[c], getSrc(>src[0], c), dType); + } + } + break; case nir_op_vec2: case nir_op_vec3: case nir_op_vec4: { -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 30/34] nvir/nir: implement ssbo intrinsics
v4: use loadFrom helper v5: support indirect buffer access Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 90 ++ 1 file changed, 90 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index c415fa71738..e15f1734cc3 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -449,6 +449,24 @@ int Converter::getSubOp(nir_intrinsic_op op) { switch (op) { + case nir_intrinsic_ssbo_atomic_add: + return NV50_IR_SUBOP_ATOM_ADD; + case nir_intrinsic_ssbo_atomic_and: + return NV50_IR_SUBOP_ATOM_AND; + case nir_intrinsic_ssbo_atomic_comp_swap: + return NV50_IR_SUBOP_ATOM_CAS; + case nir_intrinsic_ssbo_atomic_exchange: + return NV50_IR_SUBOP_ATOM_EXCH; + case nir_intrinsic_ssbo_atomic_or: + return NV50_IR_SUBOP_ATOM_OR; + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + return NV50_IR_SUBOP_ATOM_MAX; + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + return NV50_IR_SUBOP_ATOM_MIN; + case nir_intrinsic_ssbo_atomic_xor: + return NV50_IR_SUBOP_ATOM_XOR; case nir_intrinsic_vote_all: return NV50_IR_SUBOP_VOTE_ALL; case nir_intrinsic_vote_any: @@ -1905,6 +1923,78 @@ Converter::visit(nir_intrinsic_instr *insn) } break; } + case nir_intrinsic_get_buffer_size: { + LValues = convert(>dest); + const DataType dType = getDType(insn); + Value *indirectBuffer; + uint32_t buffer = getIndirect(>src[0], 0, ); + + Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0); + mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer); + break; + } + case nir_intrinsic_store_ssbo: { + DataType sType = getSType(insn->src[0], false, false); + Value *indirectBuffer; + Value *indirectOffset; + uint32_t buffer = getIndirect(>src[1], 0, ); + uint32_t offset = getIndirect(>src[2], 0, ); + + for (auto i = 0u; i < insn->num_components; ++i) { + if (!((1u << i) & nir_intrinsic_write_mask(insn))) +continue; + Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType, +offset + i * typeSizeof(sType)); + mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(>src[0], i)) +->setIndirect(0, 1, indirectBuffer); + } + info->io.globalAccess |= 0x2; + break; + } + case nir_intrinsic_load_ssbo: { + const DataType dType = getDType(insn); + LValues = convert(>dest); + Value *indirectBuffer; + Value *indirectOffset; + uint32_t buffer = getIndirect(>src[0], 0, ); + uint32_t offset = getIndirect(>src[1], 0, ); + + for (auto i = 0u; i < insn->num_components; ++i) + loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i, + indirectOffset, indirectBuffer); + + info->io.globalAccess |= 0x1; + break; + } + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_xor: { + const DataType dType = getDType(insn); + LValues = convert(>dest); + Value *indirectBuffer; + Value *indirectOffset; + uint32_t buffer = getIndirect(>src[0], 0, ); + uint32_t offset = getIndirect(>src[1], 0, ); + + Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset); + Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, +getSrc(>src[2], 0)); + if (op == nir_intrinsic_ssbo_atomic_comp_swap) + atom->setSrc(2, getSrc(>src[3], 0)); + atom->setIndirect(0, 0, indirectOffset); + atom->setIndirect(0, 1, indirectBuffer); + atom->subOp = getSubOp(op); + + info->io.globalAccess |= 0x2; + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 22/34] nvir/nir: implement loading system values
v2: support more sys values fixed a bug where for multi component reads all values ended up in x v3: add load_patch_vertices_in v4: add subgroup stuff v5: add helper invocation v6: fix loading 64 bit system values Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 122 + 1 file changed, 122 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 2bd40e00db9..43a15fe0a76 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -85,6 +85,7 @@ private: LValues& convert(nir_alu_dest *); BasicBlock* convert(nir_block *); LValues& convert(nir_dest *); + SVSemantic convert(nir_intrinsic_op); LValues& convert(nir_register *); LValues& convert(nir_ssa_def *); @@ -1457,6 +1458,70 @@ Converter::visit(nir_instr *insn) return true; } +SVSemantic +Converter::convert(nir_intrinsic_op intr) +{ + switch (intr) { + case nir_intrinsic_load_base_vertex: + return SV_BASEVERTEX; + case nir_intrinsic_load_base_instance: + return SV_BASEINSTANCE; + case nir_intrinsic_load_draw_id: + return SV_DRAWID; + case nir_intrinsic_load_front_face: + return SV_FACE; + case nir_intrinsic_load_helper_invocation: + return SV_THREAD_KILL; + case nir_intrinsic_load_instance_id: + return SV_INSTANCE_ID; + case nir_intrinsic_load_invocation_id: + return SV_INVOCATION_ID; + case nir_intrinsic_load_local_group_size: + return SV_NTID; + case nir_intrinsic_load_local_invocation_id: + return SV_TID; + case nir_intrinsic_load_num_work_groups: + return SV_NCTAID; + case nir_intrinsic_load_patch_vertices_in: + return SV_VERTEX_COUNT; + case nir_intrinsic_load_primitive_id: + return SV_PRIMITIVE_ID; + case nir_intrinsic_load_sample_id: + return SV_SAMPLE_INDEX; + case nir_intrinsic_load_sample_mask_in: + return SV_SAMPLE_MASK; + case nir_intrinsic_load_sample_pos: + return SV_SAMPLE_POS; + case nir_intrinsic_load_subgroup_eq_mask: + return SV_LANEMASK_EQ; + case nir_intrinsic_load_subgroup_ge_mask: + return SV_LANEMASK_GE; + case nir_intrinsic_load_subgroup_gt_mask: + return SV_LANEMASK_GT; + case nir_intrinsic_load_subgroup_le_mask: + return SV_LANEMASK_LE; + case nir_intrinsic_load_subgroup_lt_mask: + return SV_LANEMASK_LT; + case nir_intrinsic_load_subgroup_invocation: + return SV_LANEID; + case nir_intrinsic_load_tess_coord: + return SV_TESS_COORD; + case nir_intrinsic_load_tess_level_inner: + return SV_TESS_INNER; + case nir_intrinsic_load_tess_level_outer: + return SV_TESS_OUTER; + case nir_intrinsic_load_vertex_id: + return SV_VERTEX_ID; + case nir_intrinsic_load_work_group_id: + return SV_CTAID; + default: + ERROR("unknown SVSemantic for nir_intrinsic_op %s\n", +nir_intrinsic_infos[intr].name); + assert(false); + return SV_LAST; + } +} + bool Converter::visit(nir_intrinsic_instr *insn) { @@ -1622,6 +1687,63 @@ Converter::visit(nir_intrinsic_instr *insn) mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred); break; } + case nir_intrinsic_load_base_vertex: + case nir_intrinsic_load_base_instance: + case nir_intrinsic_load_draw_id: + case nir_intrinsic_load_front_face: + case nir_intrinsic_load_helper_invocation: + case nir_intrinsic_load_instance_id: + case nir_intrinsic_load_invocation_id: + case nir_intrinsic_load_local_group_size: + case nir_intrinsic_load_local_invocation_id: + case nir_intrinsic_load_num_work_groups: + case nir_intrinsic_load_patch_vertices_in: + case nir_intrinsic_load_primitive_id: + case nir_intrinsic_load_sample_id: + case nir_intrinsic_load_sample_mask_in: + case nir_intrinsic_load_sample_pos: + case nir_intrinsic_load_subgroup_eq_mask: + case nir_intrinsic_load_subgroup_ge_mask: + case nir_intrinsic_load_subgroup_gt_mask: + case nir_intrinsic_load_subgroup_le_mask: + case nir_intrinsic_load_subgroup_lt_mask: + case nir_intrinsic_load_subgroup_invocation: + case nir_intrinsic_load_tess_coord: + case nir_intrinsic_load_tess_level_inner: + case nir_intrinsic_load_tess_level_outer: + case nir_intrinsic_load_vertex_id: + case nir_intrinsic_load_work_group_id: { + const DataType dType = getDType(insn); + SVSemantic sv = convert(op); + LValues = convert(>dest); + + for (auto i = 0u; i < insn->num_components; ++i) { + Value *def; + if (typeSizeof(dType) == 8) +def = getSSA(); + else +def = newDefs[i]; + + if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) { +loadImm(def, 0u); + } else { +Symbol
[Mesa-dev] [PATCH v6 16/34] nvir/nir: add skeleton for nir_intrinsic_instr
Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp| 17 + 1 file changed, 17 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 2a9c0929e90..3ba0285b411 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -105,6 +105,7 @@ private: bool visit(nir_function *); bool visit(nir_if *); bool visit(nir_instr *); + bool visit(nir_intrinsic_instr *); bool visit(nir_jump_instr *); bool visit(nir_load_const_instr*); bool visit(nir_loop *); @@ -1265,6 +1266,8 @@ bool Converter::visit(nir_instr *insn) { switch (insn->type) { + case nir_instr_type_intrinsic: + return visit(nir_instr_as_intrinsic(insn)); case nir_instr_type_jump: return visit(nir_instr_as_jump(insn)); case nir_instr_type_load_const: @@ -1276,6 +1279,20 @@ Converter::visit(nir_instr *insn) return true; } +bool +Converter::visit(nir_intrinsic_instr *insn) +{ + nir_intrinsic_op op = insn->intrinsic; + + switch (op) { + default: + ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); + return false; + } + + return true; +} + bool Converter::visit(nir_jump_instr *insn) { -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 20/34] nvir/nir: implement nir_intrinsic_load_(interpolated_)input
v3: and load_output v4: use smarter getIndirect helper use new getSlotAddress helper v5: don't use const_offset directly fix for indirects v6: add support for interpolateAt Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 97 ++ 1 file changed, 97 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 546a73f7a74..04a0f03ae2f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -1511,6 +1511,103 @@ Converter::visit(nir_intrinsic_instr *insn) } break; } + case nir_intrinsic_load_input: + case nir_intrinsic_load_interpolated_input: + case nir_intrinsic_load_output: { + const DataType dType = getDType(insn); + Value *indirect; + bool input = op != nir_intrinsic_load_output; + operation nvirOp; + uint32_t mode; + + LValues = convert(>dest); + auto idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, ); + nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx]; + + /* see load_barycentric_* handling */ + if (prog->getType() == Program::TYPE_FRAGMENT) { + mode = translateInterpMode(, nvirOp); + if (op == nir_intrinsic_load_interpolated_input) { +ImmediateValue immMode; +if (getSrc(>src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode)) + mode |= immMode.reg.data.u32; + } + } + + for (auto i = 0u; i < insn->num_components; ++i) { + uint32_t address = getSlotAddress(insn, idx, i); + Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address); + if (prog->getType() == Program::TYPE_FRAGMENT) { +int s = 1; +if (typeSizeof(dType) == 8) { + Value *lo = getSSA(); + Value *hi = getSSA(); + Instruction *interp; + + interp = mkOp1(nvirOp, TYPE_U32, lo, sym); + if (nvirOp == OP_PINTERP) + interp->setSrc(s, fp.position); + if (mode & NV50_IR_INTERP_OFFSET) + interp->setSrc(s, getSrc(>src[0], 0)); + interp->setInterpolate(mode); + interp->setIndirect(0, 0, indirect); + + Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4); + interp = mkOp1(nvirOp, TYPE_U32, hi, sym1); + if (nvirOp == OP_PINTERP) + interp->setSrc(s++, fp.position); + if (mode & NV50_IR_INTERP_OFFSET) + interp->setSrc(s++, getSrc(>src[0], 0)); + interp->setInterpolate(mode); + interp->setIndirect(0, 0, indirect); + + mkOp2(OP_MERGE, dType, newDefs[i], lo, hi); +} else { + Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym); + if (nvirOp == OP_PINTERP) + interp->setSrc(s++, fp.position); + if (mode & NV50_IR_INTERP_OFFSET) + interp->setSrc(s++, getSrc(>src[0], 0)); + interp->setInterpolate(mode); + interp->setIndirect(0, 0, indirect); +} + } else { +mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch; + } + } + break; + } + case nir_intrinsic_load_barycentric_at_offset: + case nir_intrinsic_load_barycentric_at_sample: + case nir_intrinsic_load_barycentric_centroid: + case nir_intrinsic_load_barycentric_pixel: { + LValues = convert(>dest); + uint32_t mode; + + if (op == nir_intrinsic_load_barycentric_centroid) { + mode = NV50_IR_INTERP_CENTROID; + } else if (op == nir_intrinsic_load_barycentric_at_offset) { + Value *offs[2]; + for (auto c = 0u; c < 2; c++) { +offs[c] = getScratch(); +mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(>src[0], c), loadImm(NULL, 0.4375f)); +mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f)); +mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f)); +mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]); + } + mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]); + + mode = NV50_IR_INTERP_OFFSET; + } else if (op == nir_intrinsic_load_barycentric_pixel) { + mode = NV50_IR_INTERP_DEFAULT; + } else if (op == nir_intrinsic_load_barycentric_at_sample) { + mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(>src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET; + mode = NV50_IR_INTERP_OFFSET; +
[Mesa-dev] [PATCH v6 15/34] nvir/nir: implement nir_load_const_instr
Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 18 ++ 1 file changed, 18 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index a10038f9a88..2a9c0929e90 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -106,6 +106,7 @@ private: bool visit(nir_if *); bool visit(nir_instr *); bool visit(nir_jump_instr *); + bool visit(nir_load_const_instr*); bool visit(nir_loop *); nir_shader *nir; @@ -1266,6 +1267,8 @@ Converter::visit(nir_instr *insn) switch (insn->type) { case nir_instr_type_jump: return visit(nir_instr_as_jump(insn)); + case nir_instr_type_load_const: + return visit(nir_instr_as_load_const(insn)); default: ERROR("unknown nir_instr type %u\n", insn->type); return false; @@ -1300,6 +1303,21 @@ Converter::visit(nir_jump_instr *insn) return true; } +bool +Converter::visit(nir_load_const_instr *insn) +{ + assert(insn->def.bit_size <= 64); + + LValues = convert(>def); + for (int i = 0; i < insn->def.num_components; i++) { + if (insn->def.bit_size > 32) + loadImm(newDefs[i], insn->value.u64[i]); + else + loadImm(newDefs[i], insn->value.u32[i]); + } + return true; +} + bool Converter::run() { -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 18/34] nvir/nir: implement nir_intrinsic_load_uniform
v2: use new getIndirect helper fixes symbols for 64 bit types v4: use smarter getIndirect helper simplify address calculation use loadFrom helper Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 10 ++ 1 file changed, 10 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 3a81a3ca32e..797b3e7c9d5 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -1455,6 +1455,16 @@ Converter::visit(nir_intrinsic_instr *insn) nir_intrinsic_op op = insn->intrinsic; switch (op) { + case nir_intrinsic_load_uniform: { + LValues = convert(>dest); + const DataType dType = getDType(insn); + Value *indirect; + auto coffset = getIndirect(insn, 0, 0, ); + for (auto i = 0; i < insn->num_components; ++i) { + loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect); + } + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 12/34] nvir/nir: add loadFrom and storeTo helpler
Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 72 ++ 1 file changed, 72 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 8d547dbbea4..d4432684b27 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -76,6 +76,13 @@ private: bool centroid, unsigned semantics); + Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base, + uint8_t c, Value *indirect0 = nullptr, + Value *indirect1 = nullptr, bool patch = false); + void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType, +Value *src, uint8_t idx, uint8_t c, Value *indirect0 = nullptr, +Value *indirect1 = nullptr); + bool isFloatType(nir_alu_type); bool isSignedType(nir_alu_type); bool isResultFloat(nir_op); @@ -912,6 +919,71 @@ Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot) return vary[idx].slot[slot] * 4; } +Instruction * +Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def, +uint32_t base, uint8_t c, Value *indirect0, +Value *indirect1, bool patch) +{ + auto tySize = typeSizeof(ty); + + if (tySize == 8 && + (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) { + Value *lo = getSSA(); + Value *hi = getSSA(); + + Instruction *loi = + mkLoad(TYPE_U32, lo, +mkSymbol(file, i, TYPE_U32, base + c * tySize), +indirect0); + loi->setIndirect(0, 1, indirect1); + loi->perPatch = patch; + + Instruction *hii = + mkLoad(TYPE_U32, hi, +mkSymbol(file, i, TYPE_U32, base + c * tySize + 4), +indirect0); + hii->setIndirect(0, 1, indirect1); + hii->perPatch = patch; + + return mkOp2(OP_MERGE, ty, def, lo, hi); + } else { + Instruction *ld = + mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0); + ld->setIndirect(0, 1, indirect1); + ld->perPatch = patch; + return ld; + } +} + +void +Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op, + DataType ty, Value *src, uint8_t idx, uint8_t c, + Value *indirect0, Value *indirect1) +{ + uint8_t size = typeSizeof(ty); + uint32_t address = getSlotAddress(insn, idx, c); + + if (size == 8 && indirect0) { + Value *split[2]; + mkSplit(split, 4, src); + + if (op == OP_EXPORT) { + split[0] = mkMov(getSSA(), split[0], ty)->getDef(0); + split[1] = mkMov(getSSA(), split[1], ty)->getDef(0); + } + + mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0, + split[0])->perPatch = info->out[idx].patch; + mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0, + split[1])->perPatch = info->out[idx].patch; + } else { + if (op == OP_EXPORT) + src = mkMov(getSSA(size), src, ty)->getDef(0); + mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0, + src)->perPatch = info->out[idx].patch; + } +} + bool Converter::run() { -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 14/34] nvir/nir: implement CFG handling
v6: fix loops with blocks at the end nothing points to skip blocks with no instructions and no predecessors Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 270 - 1 file changed, 268 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 538c85b6a69..a10038f9a88 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -55,8 +55,10 @@ private: typedef decltype(nir_ssa_def().index) NirSSADefIdx; typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize; typedef std::unordered_map<NirSSADefIdx, LValues> NirDefMap; + typedef std::unordered_map<decltype(nir_block().index), BasicBlock*> NirBlockMap; LValues& convert(nir_alu_dest *); + BasicBlock* convert(nir_block *); LValues& convert(nir_dest *); LValues& convert(nir_register *); LValues& convert(nir_ssa_def *); @@ -98,15 +100,46 @@ private: bool assignSlots(); bool parseNIR(); + bool visit(nir_block *); + bool visit(nir_cf_node *); + bool visit(nir_function *); + bool visit(nir_if *); + bool visit(nir_instr *); + bool visit(nir_jump_instr *); + bool visit(nir_loop *); + nir_shader *nir; NirDefMap ssaDefs; NirDefMap regDefs; + NirBlockMap blocks; + unsigned int curLoopDepth; + + BasicBlock *exit; + + union { + struct { + Value *position; + } fp; + }; }; Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) : ConverterCommon(prog, info), - nir(nir) {} + nir(nir), + curLoopDepth(0) {} + +BasicBlock * +Converter::convert(nir_block *block) +{ + NirBlockMap::iterator it = blocks.find(block->index); + if (it != blocks.end()) + return (*it).second; + + BasicBlock *bb = new BasicBlock(func); + blocks[block->index] = bb; + return bb; +} bool Converter::isFloatType(nir_alu_type type) @@ -1039,6 +1072,234 @@ Converter::parseNIR() return true; } +bool +Converter::visit(nir_function *function) +{ + /* we only support emiting the main function for now */ + assert(!strcmp(function->name, "main")); + assert(function->impl); + + /* usually the blocks will set everything up, but main is special */ + BasicBlock *entry = new BasicBlock(prog->main); + exit = new BasicBlock(prog->main); + blocks[nir_start_block(function->impl)->index] = entry; + prog->main->setEntry(entry); + prog->main->setExit(exit); + + setPosition(entry, true); + + switch (prog->getType()) { + case Program::TYPE_TESSELLATION_CONTROL: + outBase = mkOp2v( + OP_SUB, TYPE_U32, getSSA(), + mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)), + mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0))); + break; + case Program::TYPE_FRAGMENT: { + Symbol *sv = mkSysVal(SV_POSITION, 3); + fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv); + fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]); + break; + } + default: + break; + } + + nir_index_ssa_defs(function->impl); + foreach_list_typed(nir_cf_node, node, node, >impl->body) { + if (!visit(node)) + return false; + } + + bb->cfg.attach(>cfg, Graph::Edge::TREE); + setPosition(exit, true); + + /* TODO: for non main function this needs to be a OP_RETURN */ + mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; + return true; +} + +bool +Converter::visit(nir_cf_node *node) +{ + switch (node->type) { + case nir_cf_node_block: + if (!visit(nir_cf_node_as_block(node))) + return false; + break; + case nir_cf_node_if: + if (!visit(nir_cf_node_as_if(node))) + return false; + break; + case nir_cf_node_loop: + if (!visit(nir_cf_node_as_loop(node))) + return false; + break; + default: + ERROR("unknown nir_cf_node type %u\n", node->type); + return false; + } + return true; +} + +bool +Converter::visit(nir_block *block) +{ + if (!block->predecessors->entries && block->instr_list.is_empty()) + return true; + + BasicBlock *bb = convert(block); + + setPosition(bb, true); + nir_foreach_instr(insn, block) { + if (!visit(insn)) + return false; + } + return true; +} + +bool +Converter::visit(nir_if *nif) +{ + DataType sType = getSType(nif->condition, false, false); + Value *src = getSrc(>condition, 0); + + nir_block *lastThen = nir_if_last_then_block(nif); + nir_block *lastElse = nir_if_last_else_block(nif); + + assert(!lastThen->successors[1]); + assert(!lastElse->successors[1]); + + BasicBlock *ifBB = convert(nir_if_first_then_block(ni
[Mesa-dev] [PATCH v6 21/34] nvir/nir: implement intrinsic_discard(_if)
Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 14 ++ 1 file changed, 14 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 04a0f03ae2f..2bd40e00db9 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -1608,6 +1608,20 @@ Converter::visit(nir_intrinsic_instr *insn) loadImm(newDefs[1], mode); break; } + case nir_intrinsic_discard: + mkOp(OP_DISCARD, TYPE_NONE, NULL); + break; + case nir_intrinsic_discard_if: { + Value *pred = new_LValue(func, FILE_PREDICATE); + if (insn->num_components > 1) { + ERROR("nir_intrinsic_discard_if only with 1 component supported!\n"); + assert(false); + return false; + } + mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(>src[0], 0), zero); + mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred); + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 19/34] nvir/nir: implement nir_intrinsic_store_(per_vertex_)output
v3: add workaround for RA issues indirects have to be multiplied by 0x10 fix indirect access v4: use smarter getIndirect helper use storeTo helper v5: don't use const_offset directly Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 46 ++ 1 file changed, 46 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 797b3e7c9d5..546a73f7a74 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -1258,6 +1258,11 @@ Converter::visit(nir_function *function) setPosition(entry, true); + if (info->io.genUserClip > 0) { + for (int c = 0; c < 4; ++c) + clipVtx[c] = getScratch(); + } + switch (prog->getType()) { case Program::TYPE_TESSELLATION_CONTROL: outBase = mkOp2v( @@ -1284,6 +1289,9 @@ Converter::visit(nir_function *function) bb->cfg.attach(>cfg, Graph::Edge::TREE); setPosition(exit, true); + if (info->io.genUserClip > 0) + handleUserClipPlanes(); + /* TODO: for non main function this needs to be a OP_RETURN */ mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; return true; @@ -1465,6 +1473,44 @@ Converter::visit(nir_intrinsic_instr *insn) } break; } + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_vertex_output: { + Value *indirect; + DataType dType = getSType(insn->src[0], false, false); + auto idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, ); + + for (auto i = 0u; i < insn->num_components; ++i) { + if (!((1u << i) & nir_intrinsic_write_mask(insn))) +continue; + + uint8_t offset = 0; + Value *src = getSrc(>src[0], i); + switch (prog->getType()) { + case Program::TYPE_FRAGMENT: { +if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) { + /* TGSI uses a different interface than NIR, TGSI stores that +* value in the z component, NIR in X +*/ + offset += 2; + src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src); +} +break; + } + case Program::TYPE_VERTEX: { +if (info->io.genUserClip > 0) { + mkMov(clipVtx[i], src); + src = clipVtx[i]; +} +break; + } + default: +break; + } + + storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect); + } + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 25/34] nvir/nir: add getOperation for intrinsics
Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 24 ++ 1 file changed, 24 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 008beb9a02a..ebf6a5ceb5c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -124,10 +124,12 @@ private: std::vector getSTypes(nir_alu_instr*); DataType getSType(nir_src&, bool isFloat, bool isSigned); + operation getOperation(nir_intrinsic_op); operation getOperation(nir_op); operation getOperation(nir_texop); operation preOperationNeeded(nir_op); + int getSubOp(nir_intrinsic_op); int getSubOp(nir_op); CondCode getCondCode(nir_op); @@ -404,6 +406,17 @@ Converter::getOperation(nir_texop op) } } +operation +Converter::getOperation(nir_intrinsic_op op) +{ + switch (op) { + default: + ERROR("couldn't get operation for nir_intrinsic_op %u\n", op); + assert(false); + return OP_NOP; + } +} + operation Converter::preOperationNeeded(nir_op op) { @@ -426,6 +439,17 @@ Converter::getSubOp(nir_op op) } } +int +Converter::getSubOp(nir_intrinsic_op op) +{ + switch (op) { + default: + ERROR("couldn't get subop for nir_intrinsic_op %u\n", op); + assert(false); + return 0; + } +} + CondCode Converter::getCondCode(nir_op op) { -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 24/34] nvir/nir: implement nir_instr_type_tex
a lot of those fields are not valid for a lot of tex ops. Not quite sure if it's worth the effort to check for those or just keep it like that. It seems to kind of work. v2: reworked offset handling add tex support with indirect R/S arguments handle GLSL_SAMPLER_DIM_EXTERNAL drop reference in convert(glsl_sampler_dim&, bool, bool) fix tg4 component selection v5: fill up coords args with scratch values if coords provided is less than TexTarget.getArgCount() Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 229 + 1 file changed, 229 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 6a43b764601..008beb9a02a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -82,6 +82,7 @@ private: typedef std::unordered_map<NirSSADefIdx, LValues> NirDefMap; typedef std::unordered_map<decltype(nir_block().index), BasicBlock*> NirBlockMap; + TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow); LValues& convert(nir_alu_dest *); BasicBlock* convert(nir_block *); LValues& convert(nir_dest *); @@ -124,6 +125,7 @@ private: DataType getSType(nir_src&, bool isFloat, bool isSigned); operation getOperation(nir_op); + operation getOperation(nir_texop); operation preOperationNeeded(nir_op); int getSubOp(nir_op); @@ -144,6 +146,10 @@ private: bool visit(nir_load_const_instr*); bool visit(nir_loop *); bool visit(nir_ssa_undef_instr *); + bool visit(nir_tex_instr *); + + /* tex stuff */ + Value* applyProjection(Value *src, Value *proj); nir_shader *nir; @@ -368,6 +374,36 @@ Converter::getOperation(nir_op op) } } +operation +Converter::getOperation(nir_texop op) +{ + switch (op) { + case nir_texop_tex: + return OP_TEX; + case nir_texop_lod: + return OP_TXLQ; + case nir_texop_txb: + return OP_TXB; + case nir_texop_txd: + return OP_TXD; + case nir_texop_txf: + case nir_texop_txf_ms: + return OP_TXF; + case nir_texop_tg4: + return OP_TXG; + case nir_texop_txl: + return OP_TXL; + case nir_texop_query_levels: + case nir_texop_texture_samples: + case nir_texop_txs: + return OP_TXQ; + default: + ERROR("couldn't get operation for nir_texop %u\n", op); + assert(false); + return OP_NOP; + } +} + operation Converter::preOperationNeeded(nir_op op) { @@ -1454,6 +1490,8 @@ Converter::visit(nir_instr *insn) return visit(nir_instr_as_load_const(insn)); case nir_instr_type_ssa_undef: return visit(nir_instr_as_ssa_undef(insn)); + case nir_instr_type_tex: + return visit(nir_instr_as_tex(insn)); default: ERROR("unknown nir_instr type %u\n", insn->type); return false; @@ -2124,6 +2162,197 @@ Converter::visit(nir_ssa_undef_instr *insn) return true; } +#define CASE_SAMPLER(ty) \ + case GLSL_SAMPLER_DIM_ ## ty : \ + if (isArray && !isShadow) \ + return TEX_TARGET_ ## ty ## _ARRAY; \ + else if (!isArray && isShadow) \ + return TEX_TARGET_## ty ## _SHADOW; \ + else if (isArray && isShadow) \ + return TEX_TARGET_## ty ## _ARRAY_SHADOW; \ + else \ + return TEX_TARGET_ ## ty + +TexTarget +Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow) +{ + switch (dim) { + CASE_SAMPLER(1D); + CASE_SAMPLER(2D); + CASE_SAMPLER(CUBE); + case GLSL_SAMPLER_DIM_3D: + return TEX_TARGET_3D; + case GLSL_SAMPLER_DIM_MS: + if (isArray) + return TEX_TARGET_2D_MS_ARRAY; + return TEX_TARGET_2D_MS; + case GLSL_SAMPLER_DIM_RECT: + if (isShadow) + return TEX_TARGET_RECT_SHADOW; + return TEX_TARGET_RECT; + case GLSL_SAMPLER_DIM_BUF: + return TEX_TARGET_BUFFER; + case GLSL_SAMPLER_DIM_EXTERNAL: + return TEX_TARGET_2D; + default: + ERROR("unknown glsl_sampler_dim %u\n", dim); + assert(false); + return TEX_TARGET_COUNT; + } +} +#undef CASE_SAMPLER + +Value* +Converter::applyProjection(Value *src, Value *proj) +{ + if (!proj) + return src; + return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj); +} + +bool +Converter::visit(nir_tex_instr *insn) +{ + switch (insn->op) { + case nir_texop_lod: + case nir_texop_query_levels: + case nir_texop_tex: + case nir_texop_texture_samples: + case nir_texop_tg4: + case nir_texop_txb: + case nir_texop_txd: + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_txl: + case nir_texop_txs: { + LValues = convert(>dest); + std::vector<Value*> srcs; + std::vector<Value*> defs; + std::vector<nir_src*> offsets; + uint8_t mask = 0; +
[Mesa-dev] [PATCH v6 17/34] nvir/nir: implement nir_alu_instr handling
Signed-off-by: Karol Herbst <kher...@redhat.com> v2: user bitfield_insert instead of bfi rework switch helper macros remove some lowering code (LoweringHelper is now used for this) v3: add pack_half_2x16_split add unpack_half_2x16_split_x/y v5: replace first argument with nullptr in loadImm calls prefer getSSA over getScratch Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 489 - 1 file changed, 488 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 3ba0285b411..3a81a3ca32e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -34,6 +34,31 @@ #include #include +#define CASE_OPFI(ni) \ + case nir_op_f ## ni : \ + case nir_op_i ## ni +#define CASE_OPFIU(ni) \ + case nir_op_f ## ni : \ + case nir_op_i ## ni : \ + case nir_op_u ## ni +#define CASE_OPIU(ni) \ + case nir_op_i ## ni : \ + case nir_op_u ## ni + +#define CASE_OPFI_RET(ni, val) \ + case nir_op_f ## ni : \ + case nir_op_i ## ni : \ + return val +#define CASE_OPFIU_RET(ni, val) \ + case nir_op_f ## ni : \ + case nir_op_i ## ni : \ + case nir_op_u ## ni : \ + return val +#define CASE_OPIU_RET(ni, val) \ + case nir_op_i ## ni : \ + case nir_op_u ## ni : \ + return val + static int type_size(const struct glsl_type *type) { @@ -97,9 +122,17 @@ private: std::vector getSTypes(nir_alu_instr*); DataType getSType(nir_src&, bool isFloat, bool isSigned); + operation getOperation(nir_op); + operation preOperationNeeded(nir_op); + + int getSubOp(nir_op); + + CondCode getCondCode(nir_op); + bool assignSlots(); bool parseNIR(); + bool visit(nir_alu_instr *); bool visit(nir_block *); bool visit(nir_cf_node *); bool visit(nir_function *); @@ -118,6 +151,7 @@ private: unsigned int curLoopDepth; BasicBlock *exit; + Value *zero; union { struct { @@ -129,7 +163,10 @@ private: Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) : ConverterCommon(prog, info), nir(nir), - curLoopDepth(0) {} + curLoopDepth(0) +{ + zero = mkImm((uint32_t)0); +} BasicBlock * Converter::convert(nir_block *block) @@ -245,6 +282,137 @@ Converter::getSType(nir_src , bool isFloat, bool isSigned) return typeOfSize(bitSize / 8, isFloat, isSigned); } +operation +Converter::getOperation(nir_op op) +{ + switch (op) { + // basic ops with float and int variants + CASE_OPFI_RET(abs, OP_ABS); + CASE_OPFI_RET(add, OP_ADD); + CASE_OPFI_RET(and, OP_AND); + CASE_OPFIU_RET(div, OP_DIV); + CASE_OPIU_RET(find_msb, OP_BFIND); + CASE_OPFIU_RET(max, OP_MAX); + CASE_OPFIU_RET(min, OP_MIN); + CASE_OPFIU_RET(mod, OP_MOD); + CASE_OPFI_RET(rem, OP_MOD); + CASE_OPFI_RET(mul, OP_MUL); + CASE_OPIU_RET(mul_high, OP_MUL); + CASE_OPFI_RET(neg, OP_NEG); + CASE_OPFI_RET(not, OP_NOT); + CASE_OPFI_RET(or, OP_OR); + CASE_OPFI_RET(eq, OP_SET); + CASE_OPFIU_RET(ge, OP_SET); + CASE_OPFIU_RET(lt, OP_SET); + CASE_OPFI_RET(ne, OP_SET); + CASE_OPIU_RET(shr, OP_SHR); + CASE_OPFI_RET(sub, OP_SUB); + CASE_OPFI_RET(xor, OP_XOR); + case nir_op_fceil: + return OP_CEIL; + case nir_op_fcos: + return OP_COS; + case nir_op_f2f32: + case nir_op_f2f64: + case nir_op_f2i32: + case nir_op_f2i64: + case nir_op_f2u32: + case nir_op_f2u64: + case nir_op_i2f32: + case nir_op_i2f64: + case nir_op_i2i32: + case nir_op_i2i64: + case nir_op_u2f32: + case nir_op_u2f64: + case nir_op_u2u32: + case nir_op_u2u64: + return OP_CVT; + case nir_op_fddx: + case nir_op_fddx_coarse: + case nir_op_fddx_fine: + return OP_DFDX; + case nir_op_fddy: + case nir_op_fddy_coarse: + case nir_op_fddy_fine: + return OP_DFDY; + case nir_op_fexp2: + return OP_EX2; + case nir_op_ffloor: + return OP_FLOOR; + case nir_op_ffma: + return OP_FMA; + case nir_op_flog2: + return OP_LG2; + case nir_op_pack_64_2x32_split: + return OP_MERGE; + case nir_op_frcp: + return OP_RCP; + case nir_op_frsq: + return OP_RSQ; + case nir_op_fsat: + return OP_SAT; + case nir_op_ishl: + return OP_SHL; + case nir_op_fsin: + return OP_SIN; + case nir_op_fsqrt: + return OP_SQRT; + case nir_op_ftrunc: + return OP_TRUNC; + default: + ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name); + assert(false); + return OP_NOP; + } +} + +operation +Converter::preOperationNeeded(nir_op op) +{ + switch (op) { + case nir_op_fcos: + case nir_op_fsin: + return OP_PRESIN; + default: + return OP_NOP; + } +} + +int +Converter::getSubOp(nir_op op) +{ + switch (op) { + CASE_OPIU_RET(mul_hi
[Mesa-dev] [PATCH v6 07/34] nvc0: add env var to make nir default
v2: allow for non debug builds as well v3: move reading out env var more global disable tg4 with multiple offsets with nir disable caps for 64 bit types v6: nv50 support disable MS images disable bindless textures Acked-by: Pierre Moreau <pierre.mor...@free.fr> Signed-off-by: Karol Herbst <kher...@redhat.com> --- src/gallium/drivers/nouveau/nouveau_screen.c | 5 + src/gallium/drivers/nouveau/nouveau_screen.h | 2 ++ src/gallium/drivers/nouveau/nv50/nv50_screen.c | 4 +++- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 19 +-- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index c144b39b2dd..2598c78a45b 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -176,9 +176,14 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) union nouveau_bo_config mm_config; char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG"); + char *use_nir = getenv("NV50_PROG_USE_NIR"); + if (nv_dbg) nouveau_mesa_debug = atoi(nv_dbg); + if (use_nir) + screen->prefer_nir = strtol(use_nir, NULL, 0) == 1; + /* These must be set before any failure is possible, as the cleanup * paths assume they're responsible for deleting them. */ diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index e4fbae99ca4..1229b66b26f 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -62,6 +62,8 @@ struct nouveau_screen { struct disk_cache *disk_shader_cache; + bool prefer_nir; + #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS union { uint64_t v[29]; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index ce82c0e80f2..222199a38e4 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -310,6 +310,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader, enum pipe_shader_cap param) { + const struct nouveau_screen *screen = nouveau_screen(pscreen); + switch (shader) { case PIPE_SHADER_VERTEX: case PIPE_SHADER_GEOMETRY: @@ -363,7 +365,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: return MIN2(16, PIPE_MAX_SAMPLERS); case PIPE_SHADER_CAP_PREFERRED_IR: - return PIPE_SHADER_IR_TGSI; + return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 37fe173f6b6..3e00a044265 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -90,9 +90,11 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen, if (bindings & PIPE_BIND_SHADER_IMAGE) { if (sample_count > 0 && - nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS) { + (nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS || + nouveau_screen(pscreen)->prefer_nir)) { /* MS images are currently unsupported on Maxwell because they have to * be handled explicitly. */ + /* MS images are currently unsupported with NIR */ return false; } @@ -112,7 +114,8 @@ static int nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; - struct nouveau_device *dev = nouveau_screen(pscreen)->device; + const struct nouveau_screen *screen = nouveau_screen(pscreen); + struct nouveau_device *dev = screen->device; switch (param) { /* non-boolean caps */ @@ -216,7 +219,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: - case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: @@ -256,6 +258,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: return 1; + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + /* TODO: nir doesn't support tg4 with multiple offsets */ + return screen->prefer_nir ? 0 : 1; case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return nouveau_screen(ps
[Mesa-dev] [PATCH v6 13/34] nvir/nir: parse NIR shader info
v2: parse a few more fields v3: add special handling for GL_ISOLINES Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 60 ++ 1 file changed, 60 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index d4432684b27..538c85b6a69 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -96,6 +96,7 @@ private: DataType getSType(nir_src&, bool isFloat, bool isSigned); bool assignSlots(); + bool parseNIR(); nir_shader *nir; @@ -984,6 +985,60 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op, } } +bool +Converter::parseNIR() +{ + info->io.clipDistances = nir->info.clip_distance_array_size; + info->io.cullDistances = nir->info.cull_distance_array_size; + + switch(prog->getType()) { + case Program::TYPE_COMPUTE: + info->prop.cp.numThreads[0] = nir->info.cs.local_size[0]; + info->prop.cp.numThreads[1] = nir->info.cs.local_size[1]; + info->prop.cp.numThreads[2] = nir->info.cs.local_size[2]; + info->bin.smemSize = nir->info.cs.shared_size; + break; + case Program::TYPE_FRAGMENT: + info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests; + info->prop.fp.persampleInvocation = + (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) || + (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS); + info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage; + info->prop.fp.usesDiscard = nir->info.fs.uses_discard; + info->prop.fp.usesSampleMaskIn = + !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN); + break; + case Program::TYPE_GEOMETRY: + info->prop.gp.inputPrim = nir->info.gs.input_primitive; + info->prop.gp.instanceCount = nir->info.gs.invocations; + info->prop.gp.maxVertices = nir->info.gs.vertices_out; + info->prop.gp.outputPrim = nir->info.gs.output_primitive; + break; + case Program::TYPE_TESSELLATION_CONTROL: + case Program::TYPE_TESSELLATION_EVAL: + if (nir->info.tess.primitive_mode == GL_ISOLINES) + info->prop.tp.domain = GL_LINES; + else + info->prop.tp.domain = nir->info.tess.primitive_mode; + info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out; + info->prop.tp.outputPrim = + nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES; + info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3; + info->prop.tp.winding = !nir->info.tess.ccw; + break; + case Program::TYPE_VERTEX: + info->prop.vp.usesDrawParameters = + (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) || + (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) || + (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)); + break; + default: + break; + } + + return true; +} + bool Converter::run() { @@ -1020,6 +1075,11 @@ Converter::run() if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) nir_print_shader(nir, stderr); + if (!parseNIR()) { + ERROR("Couldn't prase NIR!\n"); + return false; + } + if (!assignSlots()) { ERROR("Couldn't assign slots!\n"); return false; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev