On 2017-12-03 — 15:36, Ilia Mirkin wrote: > On Sun, Dec 3, 2017 at 3:28 PM, Pierre Moreau <pierre.mor...@free.fr> wrote: > > The existing lowering code assumed the shift would not be an immediate > > but did not guard against it. However, in the constant folding pass, a > > multiplication by a power-of-2 immediate would get optimised into a > > shift-left instruction, with the shift value being an immediate. > > Given this support, it might make sense to allow immediates to be > propagated into 64-bit shl/shr's in target_nvc0.cpp::insnCanLoad(), if > it's not already there. > > And please add some piglit tests which shift using immediates so that > these might be better tested. > > Please make sure that the SM35+ path is also read to have immediates > in the shift arg -- I'm not sure that the SHF.L op's emission was > written to fully handle that if it never happened in practice.
Will do that! I also realised this patch contains a few errors. > > > > > Signed-off-by: Pierre Moreau <pierre.mor...@free.fr> > > --- > > .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 57 > > +++++++++++++++------- > > 1 file changed, 40 insertions(+), 17 deletions(-) > > > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > > index 6b51b7607c..2b09caa737 100644 > > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > > @@ -199,27 +199,50 @@ NVC0LegalizeSSA::handleShift(Instruction *lo) > > // between the right/left cases. The main difference is swapping > > hi/lo > > // on input and output. > > > > - Value *x32_minus_shift, *pred, *hi1, *hi2; > > DataType type = isSignedIntType(lo->dType) ? TYPE_S32 : TYPE_U32; > > operation antiop = op == OP_SHR ? OP_SHL : OP_SHR; > > if (op == OP_SHR) > > std::swap(src[0], src[1]); > > - bld.mkOp2(OP_ADD, TYPE_U32, (x32_minus_shift = bld.getSSA()), shift, > > bld.mkImm(0x20)) > > - ->src(0).mod = Modifier(NV50_IR_MOD_NEG); > > - bld.mkCmp(OP_SET, CC_LE, TYPE_U8, (pred = bld.getSSA(1, > > FILE_PREDICATE)), > > - TYPE_U32, shift, bld.mkImm(32)); > > - // Compute HI (shift <= 32) > > - bld.mkOp2(OP_OR, TYPE_U32, (hi1 = bld.getSSA()), > > - bld.mkOp2v(op, TYPE_U32, bld.getSSA(), src[1], shift), > > - bld.mkOp2v(antiop, TYPE_U32, bld.getSSA(), src[0], > > x32_minus_shift)) > > - ->setPredicate(CC_P, pred); > > - // Compute LO (all shift values) > > - bld.mkOp2(op, type, (dst[0] = bld.getSSA()), src[0], shift); > > - // Compute HI (shift > 32) > > - bld.mkOp2(op, type, (hi2 = bld.getSSA()), src[0], > > - bld.mkOp1v(OP_NEG, TYPE_S32, bld.getSSA(), > > x32_minus_shift)) > > - ->setPredicate(CC_NOT_P, pred); > > - bld.mkOp2(OP_UNION, TYPE_U32, (dst[1] = bld.getSSA()), hi1, hi2); > > + > > + ImmediateValue *shiftImm = shift->asImm(); > > + if (shiftImm) { > > + if (shift->reg.data.u32 <= 32) { > > + Value *x32_minus_shift = bld.getSSA(); > > + x32_minus_shift->reg.data.u32 = 32u - shiftImm->reg.data.u32; > > + // Compute LO > > + bld.mkOp2(op, type, (dst[0] = bld.getSSA()), src[0], shiftImm); > > + // Compute HI > > + bld.mkOp2(OP_OR, TYPE_U32, (dst[1] = bld.getSSA()), > > + bld.mkOp2v(op, TYPE_U32, bld.getSSA(), src[1], > > shiftImm), > > + bld.mkOp2v(antiop, TYPE_U32, bld.getSSA(), src[0], > > x32_minus_shift)); > > + } else { > > + Value *shift_minus_x32 = bld.getSSA(); > > + shift_minus_x32->reg.data.u32 = 32u - shiftImm->reg.data.u32; > > + // Compute LO (shift >= 32, therefore filled with 0s) > > + bld.mkOp1(OP_MOV, type, (dst[0] = bld.getSSA()), > > bld.mkImm(0x0)); > > + // Compute HI > > + bld.mkOp2(op, type, (dst[1] = bld.getSSA()), src[0], > > shift_minus_x32); > > + } > > + } else { > > + Value *x32_minus_shift, *pred, *hi1, *hi2; > > + bld.mkOp2(OP_ADD, TYPE_U32, (x32_minus_shift = bld.getSSA()), > > shift, bld.mkImm(0x20)) > > + ->src(0).mod = Modifier(NV50_IR_MOD_NEG); > > + bld.mkCmp(OP_SET, CC_LE, TYPE_U8, (pred = bld.getSSA(1, > > FILE_PREDICATE)), > > + TYPE_U32, shift, bld.mkImm(32)); > > + // Compute HI (shift <= 32) > > + bld.mkOp2(OP_OR, TYPE_U32, (hi1 = bld.getSSA()), > > + bld.mkOp2v(op, TYPE_U32, bld.getSSA(), src[1], shift), > > + bld.mkOp2v(antiop, TYPE_U32, bld.getSSA(), src[0], > > x32_minus_shift)) > > + ->setPredicate(CC_P, pred); > > + // Compute LO (all shift values) > > + bld.mkOp2(op, type, (dst[0] = bld.getSSA()), src[0], shift); > > + // Compute HI (shift > 32) > > + bld.mkOp2(op, type, (hi2 = bld.getSSA()), src[0], > > + bld.mkOp1v(OP_NEG, TYPE_S32, bld.getSSA(), > > x32_minus_shift)) > > + ->setPredicate(CC_NOT_P, pred); > > + bld.mkOp2(OP_UNION, TYPE_U32, (dst[1] = bld.getSSA()), hi1, hi2); > > + } > > + > > if (op == OP_SHR) > > std::swap(dst[0], dst[1]); > > bld.mkOp2(OP_MERGE, TYPE_U64, dst64, dst[0], dst[1]); > > -- > > 2.15.0 > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev