The existing lowering code assumed the shift would not be an immediate but did not guard against it. However, in the constant folding pass, a multiplication by a power-of-2 immediate would get optimised into a shift-left instruction, with the shift value being an immediate.
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr> --- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 57 +++++++++++++++------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 6b51b7607c..2b09caa737 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -199,27 +199,50 @@ NVC0LegalizeSSA::handleShift(Instruction *lo) // between the right/left cases. The main difference is swapping hi/lo // on input and output. - Value *x32_minus_shift, *pred, *hi1, *hi2; DataType type = isSignedIntType(lo->dType) ? TYPE_S32 : TYPE_U32; operation antiop = op == OP_SHR ? OP_SHL : OP_SHR; if (op == OP_SHR) std::swap(src[0], src[1]); - bld.mkOp2(OP_ADD, TYPE_U32, (x32_minus_shift = bld.getSSA()), shift, bld.mkImm(0x20)) - ->src(0).mod = Modifier(NV50_IR_MOD_NEG); - bld.mkCmp(OP_SET, CC_LE, TYPE_U8, (pred = bld.getSSA(1, FILE_PREDICATE)), - TYPE_U32, shift, bld.mkImm(32)); - // Compute HI (shift <= 32) - bld.mkOp2(OP_OR, TYPE_U32, (hi1 = bld.getSSA()), - bld.mkOp2v(op, TYPE_U32, bld.getSSA(), src[1], shift), - bld.mkOp2v(antiop, TYPE_U32, bld.getSSA(), src[0], x32_minus_shift)) - ->setPredicate(CC_P, pred); - // Compute LO (all shift values) - bld.mkOp2(op, type, (dst[0] = bld.getSSA()), src[0], shift); - // Compute HI (shift > 32) - bld.mkOp2(op, type, (hi2 = bld.getSSA()), src[0], - bld.mkOp1v(OP_NEG, TYPE_S32, bld.getSSA(), x32_minus_shift)) - ->setPredicate(CC_NOT_P, pred); - bld.mkOp2(OP_UNION, TYPE_U32, (dst[1] = bld.getSSA()), hi1, hi2); + + ImmediateValue *shiftImm = shift->asImm(); + if (shiftImm) { + if (shift->reg.data.u32 <= 32) { + Value *x32_minus_shift = bld.getSSA(); + x32_minus_shift->reg.data.u32 = 32u - shiftImm->reg.data.u32; + // Compute LO + bld.mkOp2(op, type, (dst[0] = bld.getSSA()), src[0], shiftImm); + // Compute HI + bld.mkOp2(OP_OR, TYPE_U32, (dst[1] = bld.getSSA()), + bld.mkOp2v(op, TYPE_U32, bld.getSSA(), src[1], shiftImm), + bld.mkOp2v(antiop, TYPE_U32, bld.getSSA(), src[0], x32_minus_shift)); + } else { + Value *shift_minus_x32 = bld.getSSA(); + shift_minus_x32->reg.data.u32 = 32u - shiftImm->reg.data.u32; + // Compute LO (shift >= 32, therefore filled with 0s) + bld.mkOp1(OP_MOV, type, (dst[0] = bld.getSSA()), bld.mkImm(0x0)); + // Compute HI + bld.mkOp2(op, type, (dst[1] = bld.getSSA()), src[0], shift_minus_x32); + } + } else { + Value *x32_minus_shift, *pred, *hi1, *hi2; + bld.mkOp2(OP_ADD, TYPE_U32, (x32_minus_shift = bld.getSSA()), shift, bld.mkImm(0x20)) + ->src(0).mod = Modifier(NV50_IR_MOD_NEG); + bld.mkCmp(OP_SET, CC_LE, TYPE_U8, (pred = bld.getSSA(1, FILE_PREDICATE)), + TYPE_U32, shift, bld.mkImm(32)); + // Compute HI (shift <= 32) + bld.mkOp2(OP_OR, TYPE_U32, (hi1 = bld.getSSA()), + bld.mkOp2v(op, TYPE_U32, bld.getSSA(), src[1], shift), + bld.mkOp2v(antiop, TYPE_U32, bld.getSSA(), src[0], x32_minus_shift)) + ->setPredicate(CC_P, pred); + // Compute LO (all shift values) + bld.mkOp2(op, type, (dst[0] = bld.getSSA()), src[0], shift); + // Compute HI (shift > 32) + bld.mkOp2(op, type, (hi2 = bld.getSSA()), src[0], + bld.mkOp1v(OP_NEG, TYPE_S32, bld.getSSA(), x32_minus_shift)) + ->setPredicate(CC_NOT_P, pred); + bld.mkOp2(OP_UNION, TYPE_U32, (dst[1] = bld.getSSA()), hi1, hi2); + } + if (op == OP_SHR) std::swap(dst[0], dst[1]); bld.mkOp2(OP_MERGE, TYPE_U64, dst64, dst[0], dst[1]); -- 2.15.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev