2016-04-14 0:42 GMT+02:00 Rob Clark <[email protected]>: > From: Russell King <[email protected]> > > Add support for lowering FLR and CEIL to FRC/SUB and FRC/ADD > instructions for GPUs that support FRC but not FLR or CEIL. Since > these uses FRC, it is invalid to ask for FLR or CEIL to be lowered > along with FRC, so add an assert to catch this invalid configuration. > > We also need to deal with FLR instructions emitted by the lowering > code. Fix these up with the FRC+SUB equivalent when FLR lowering is > enabled. > > Signed-off-by: Russell King <[email protected]> > Reviewed-by: Rob Clark <[email protected]> > --- > src/gallium/auxiliary/tgsi/tgsi_lowering.c | 167 > +++++++++++++++++++++++++---- > src/gallium/auxiliary/tgsi/tgsi_lowering.h | 2 + > 2 files changed, 149 insertions(+), 20 deletions(-) > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c > b/src/gallium/auxiliary/tgsi/tgsi_lowering.c > index 0ffd855..b2dd37e 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c > @@ -676,14 +676,19 @@ transform_lit(struct tgsi_transform_context *tctx, > * dst.w = 1.0 > * > * ; needs: 1 tmp, imm{1.0} > - * FLR tmpA.x, src.x > + * if (lowering FLR) { > + * FRC tmpA.x, src.x > + * SUB tmpA.x, src.x, tmpA.x > + * } else { > + * FLR tmpA.x, src.x > + * } > * EX2 tmpA.y, src.x > * SUB dst.y, src.x, tmpA.x > * EX2 dst.x, tmpA.x > * MOV dst.z, tmpA.y > * MOV dst.w, imm{1.0} > */ > -#define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \ > +#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \ > NINST(1)+ NINST(1) - OINST(1)) > #define EXP_TMP 1 > static void > @@ -696,14 +701,35 @@ transform_exp(struct tgsi_transform_context *tctx, > struct tgsi_full_instruction new_inst; > > if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { > - /* FLR tmpA.x, src.x */ > - new_inst = tgsi_default_full_instruction(); > - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; > - new_inst.Instruction.NumDstRegs = 1; > - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); > - new_inst.Instruction.NumSrcRegs = 1; > - reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); > - tctx->emit_instruction(tctx, &new_inst); > + if (ctx->config->lower_FLR) { > + /* FRC tmpA.x, src.x */ > + new_inst = tgsi_default_full_instruction(); > + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; > + new_inst.Instruction.NumDstRegs = 1; > + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); > + new_inst.Instruction.NumSrcRegs = 1; > + reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); > + tctx->emit_instruction(tctx, &new_inst); > + > + /* SUB tmpA.x, src.x, tmpA.x */ > + new_inst = tgsi_default_full_instruction(); > + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; > + new_inst.Instruction.NumDstRegs = 1; > + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); > + new_inst.Instruction.NumSrcRegs = 2; > + reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); > + reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); > + tctx->emit_instruction(tctx, &new_inst); > + } else { > + /* FLR tmpA.x, src.x */ > + new_inst = tgsi_default_full_instruction(); > + new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; > + new_inst.Instruction.NumDstRegs = 1; > + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); > + new_inst.Instruction.NumSrcRegs = 1; > + reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); > + tctx->emit_instruction(tctx, &new_inst); > + } > } > > if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { > @@ -771,14 +797,19 @@ transform_exp(struct tgsi_transform_context *tctx, > * > * ; needs: 1 tmp, imm{1.0} > * LG2 tmpA.x, |src.x| > - * FLR tmpA.y, tmpA.x > + * if (lowering FLR) { > + * FRC tmpA.y, tmpA.x > + * SUB tmpA.y, tmpA.x, tmpA.y > + * } else { > + * FLR tmpA.y, tmpA.x > + * } > * EX2 tmpA.z, tmpA.y > * RCP tmpA.z, tmpA.z > * MUL dst.y, |src.x|, tmpA.z > * MOV dst.xz, tmpA.yx > * MOV dst.w, imm{1.0} > */ > -#define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \ > +#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \ > NINST(2) + NINST(1) + NINST(1) - OINST(1)) > #define LOG_TMP 1 > static void > @@ -803,14 +834,35 @@ transform_log(struct tgsi_transform_context *tctx, > } > > if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { > - /* FLR tmpA.y, tmpA.x */ > - new_inst = tgsi_default_full_instruction(); > - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; > - new_inst.Instruction.NumDstRegs = 1; > - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); > - new_inst.Instruction.NumSrcRegs = 1; > - reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); > - tctx->emit_instruction(tctx, &new_inst); > + if (ctx->config->lower_FLR) { > + /* FRC tmpA.y, tmpA.x */ > + new_inst = tgsi_default_full_instruction(); > + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; > + new_inst.Instruction.NumDstRegs = 1; > + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); > + new_inst.Instruction.NumSrcRegs = 1; > + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); > + tctx->emit_instruction(tctx, &new_inst); > + > + /* SUB tmpA.y, tmpA.x, tmpA.y */ > + new_inst = tgsi_default_full_instruction(); > + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; > + new_inst.Instruction.NumDstRegs = 1; > + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); > + new_inst.Instruction.NumSrcRegs = 2; > + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); > + reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); > + tctx->emit_instruction(tctx, &new_inst); > + } else { > + /* FLR tmpA.y, tmpA.x */ > + new_inst = tgsi_default_full_instruction(); > + new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; > + new_inst.Instruction.NumDstRegs = 1; > + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); > + new_inst.Instruction.NumSrcRegs = 1; > + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); > + tctx->emit_instruction(tctx, &new_inst); > + } > } > > if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { > @@ -1005,6 +1057,58 @@ transform_dotp(struct tgsi_transform_context *tctx, > } > } > > +/* FLR - floor, CEIL - ceil > + * ; needs: 1 tmp > + * if (CEIL) { > + * FRC tmpA, -src > + * ADD dst, src, tmpA > + * } else { > + * FRC tmpA, src > + * SUB dst, src, tmpA > + * } > + */ > +#define FLR_GROW (NINST(1) + NINST(2) - OINST(1)) > +#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1)) > +#define FLR_TMP 1 > +#define CEIL_TMP 1 > +static void > +transform_flr_ceil(struct tgsi_transform_context *tctx, > + struct tgsi_full_instruction *inst) > +{ > + struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); > + struct tgsi_full_dst_register *dst = &inst->Dst[0]; > + struct tgsi_full_src_register *src0 = &inst->Src[0]; > + struct tgsi_full_instruction new_inst; > + unsigned opcode = inst->Instruction.Opcode; > + > + if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { > + /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */ > + new_inst = tgsi_default_full_instruction(); > + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; > + new_inst.Instruction.NumDstRegs = 1; > + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); > + new_inst.Instruction.NumSrcRegs = 1; > + reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); > + > + if (opcode == TGSI_OPCODE_CEIL) > + new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate; > + tctx->emit_instruction(tctx, &new_inst); > + > + /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */ > + new_inst = tgsi_default_full_instruction(); > + if (opcode == TGSI_OPCODE_CEIL) > + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; > + else > + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; > + new_inst.Instruction.NumDstRegs = 1; > + reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); > + new_inst.Instruction.NumSrcRegs = 2; > + reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); > + reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); > + tctx->emit_instruction(tctx, &new_inst); > + } > +} > + > /* Inserts a MOV_SAT for the needed components of tex coord. Note that > * in the case of TXP, the clamping must happen *after* projection, so > * we need to lower TXP to TEX. > @@ -1401,6 +1505,16 @@ transform_instr(struct tgsi_transform_context *tctx, > goto skip; > transform_dotp(tctx, inst); > break; > + case TGSI_OPCODE_FLR: > + if (!ctx->config->lower_FLR) > + goto skip; > + transform_flr_ceil(tctx, inst); > + break; > + case TGSI_OPCODE_CEIL: > + if (!ctx->config->lower_CEIL) > + goto skip; > + transform_flr_ceil(tctx, inst); > + break; > case TGSI_OPCODE_TEX: > case TGSI_OPCODE_TXP: > case TGSI_OPCODE_TXB: > @@ -1432,6 +1546,9 @@ tgsi_transform_lowering(const struct > tgsi_lowering_config *config, > /* sanity check in case limit is ever increased: */ > STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS); > > + /* sanity check the lowering */ > + assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL))); > + > memset(&ctx, 0, sizeof(ctx)); > ctx.base.transform_instruction = transform_instr; > ctx.info = info; > @@ -1473,6 +1590,8 @@ tgsi_transform_lowering(const struct > tgsi_lowering_config *config, > OPCS(DPH) || > OPCS(DP2) || > OPCS(DP2A) || > + OPCS(FLR) || > + OPCS(CEIL) || > OPCS(TXP) || > ctx.two_side_colors || > ctx.saturate)) > @@ -1541,6 +1660,14 @@ tgsi_transform_lowering(const struct > tgsi_lowering_config *config, > newlen += DP2A_GROW * OPCS(DP2A); > numtmp = MAX2(numtmp, DOTP_TMP); > } > + if (OPCS(FLR)) { > + newlen += FLR_GROW * OPCS(FLR); > + numtmp = MAX2(numtmp, FLR_TMP); > + } > + if (OPCS(CEIL)) { > + newlen += CEIL_GROW * OPCS(CEIL); > + numtmp = MAX2(numtmp, CEIL_TMP); > + } > if (ctx.saturate || config->lower_TXP) { > int n = 0; > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h > b/src/gallium/auxiliary/tgsi/tgsi_lowering.h > index 52c204f..a96d85d 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h > +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h > @@ -68,6 +68,8 @@ struct tgsi_lowering_config > unsigned lower_DPH:1; > unsigned lower_DP2:1; > unsigned lower_DP2A:1; > + unsigned lower_FLR:1; > + unsigned lower_CEIL:1; > > /* bitmask of (1 << TGSI_TEXTURE_type): */ > unsigned lower_TXP; > -- > 2.5.5 >
The series is Reviewed-by: Christian Gmeiner <[email protected]> _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
