Reviewed-by: Pierre Moreau <pierre.mor...@free.fr>
On 10:20 pm - Jan 25 2017, Ilia Mirkin wrote: > Many many many compute shaders only define a 1- or 2-dimensional block, > but then continue to use system values that take the full 3d into > account (like gl_LocalInvocationIndex, etc). So for the special case > that a dimension is exactly 1, we know that the thread id along that > axis will always be 0, so return it as such and allow constant folding > to fix things up. > > Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> > --- > src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 6 +++++- > src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h | 2 +- > src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 10 ++++++++-- > src/gallium/drivers/nouveau/codegen/nv50_ir_target.h | 4 +++- > 4 files changed, 17 insertions(+), 5 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp > index 186c9fd..b67a1dd 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp > @@ -1179,7 +1179,11 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) > info->prop.gp.instanceCount = 1; > info->prop.gp.maxVertices = 1; > } > - info->prop.cp.numThreads = 1; > + if (info->type == PIPE_SHADER_COMPUTE) { > + info->prop.cp.numThreads[0] = > + info->prop.cp.numThreads[1] = > + info->prop.cp.numThreads[2] = 1; > + } > info->io.pointSize = 0xff; > info->io.instanceId = 0xff; > info->io.vertexId = 0xff; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h > index 65d0904..e7d840d 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h > @@ -152,7 +152,7 @@ struct nv50_ir_prog_info > uint32_t inputOffset; /* base address for user args */ > uint32_t sharedOffset; /* reserved space in s[] */ > uint32_t gridInfoBase; /* base address for NTID,NCTAID */ > - uint32_t numThreads; /* max number of threads */ > + uint16_t numThreads[3]; /* max number of threads */ > } cp; > } prop; > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > index 6320e52..51f8b29 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > @@ -1047,7 +1047,6 @@ bool Source::scanSource() > } > > info->io.viewportId = -1; > - info->prop.cp.numThreads = 1; > > info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16); > info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte)); > @@ -1150,9 +1149,13 @@ void Source::scanProperty(const struct > tgsi_full_property *prop) > info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but > points */ > break; > case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH: > + info->prop.cp.numThreads[0] = prop->u[0].Data; > + break; > case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT: > + info->prop.cp.numThreads[1] = prop->u[0].Data; > + break; > case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH: > - info->prop.cp.numThreads *= prop->u[0].Data; > + info->prop.cp.numThreads[2] = prop->u[0].Data; > break; > case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: > info->io.clipDistances = prop->u[0].Data; > @@ -1941,6 +1944,9 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, > int c, Value *ptr) > return ld->getDef(0); > case TGSI_FILE_SYSTEM_VALUE: > assert(!ptr); > + if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID && > + info->prop.cp.numThreads[swz] == 1) > + return zero; > ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c)); > ld->perPatch = info->sv[idx].patch; > return ld->getDef(0); > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h > index eaf50cc..e9d1057 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h > @@ -174,7 +174,9 @@ public: > virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const > = 0; > > virtual void parseDriverInfo(const struct nv50_ir_prog_info *info) { > - threads = info->prop.cp.numThreads; > + threads = info->prop.cp.numThreads[0] * > + info->prop.cp.numThreads[1] * > + info->prop.cp.numThreads[2]; > if (threads == 0) > threads = info->target >= NVISA_GK104_CHIPSET ? 1024 : 512; > } > -- > 2.10.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev