I've been having some fun with fragment shader and found allot of ops
missing. someone might be interested in some more ops.
Fragment ops added:
- ABS
- CMP
- DP4
- DPH
- DST
- EX2
- FLR
- FRC
- LG2
- MAX
- MIN
- RCP
- SGE
- SLT
- XPD
Fragment ops still missing:
- COS
- KIL
- LIT
- SCS
- SIN
They all pass the basic test in Mesa/progs/fp though Humus demos
(www.humus.ca) still are far from looking proper.
Now the Questions:
- Missing Commit from r300.sf.net:
When trying to Implement the KIL ops I found a commit by Ben Skeggs on
r300.sf.net that was lost in the Mesa tree:
http://sourceforge.net/mailarchive/forum.php?thread_id=7728162&forum_id=42268
At the very least the changes for r300_reg.h should be included in Mesa.
- whats with the DP3 op?
if (fpi->DstReg.WriteMask & WRITEMASK_W) {
/* I assume these need to share the same alu slot */
sync_streams(rp);
emit_arith(rp, PFS_OP_DP4, dest, WRITEMASK_W,
pfs_zero, pfs_zero, pfs_zero,
flags);
}
emit_arith(rp, PFS_OP_DP3, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask & WRITEMASK_XYZ,
t_src(rp, fpi->SrcReg[0]),
t_src(rp, fpi->SrcReg[1]),
pfs_zero, flags);
Why is DP4 called for W and why does DP3 excluding W?
I don't see how it can conform to the specs:
tmp0 = VectorLoad(op0);
tmp1 = VectorLoad(op1);
dot = (tmp0.x * tmp1.x) + (tmp0.y * tmp1.y) + (tmp0.z * tmp1.z);
result.x = dot;
result.y = dot;
result.z = dot;
result.w = dot;
Index: r300_fragprog.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r300/r300_fragprog.c,v
retrieving revision 1.16
diff -u -r1.16 r300_fragprog.c
--- r300_fragprog.c 20 Nov 2005 17:52:40 -0000 1.16
+++ r300_fragprog.c 6 Jan 2006 15:47:45 -0000
@@ -763,7 +763,10 @@
switch (fpi->Opcode) {
case OPCODE_ABS:
- ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ // test:
+ emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask,
+ t_src(rp,
fpi->SrcReg[0]), pfs_one, pfs_zero,
+ flags | PFS_FLAG_ABS);
break;
case OPCODE_ADD:
emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask,
@@ -773,6 +776,13 @@
flags);
break;
case OPCODE_CMP:
+ //test:
+ emit_arith(rp, PFS_OP_CMP, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask,
+ t_src(rp,
fpi->SrcReg[2]),
+ t_src(rp,
fpi->SrcReg[1]),
+ t_src(rp,
fpi->SrcReg[0]),
+ flags);
+ break;
case OPCODE_COS:
ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
break;
@@ -792,13 +802,108 @@
pfs_zero, flags);
break;
case OPCODE_DP4:
+ //test:
+ emit_arith(rp, PFS_OP_DP4, t_dst(rp, fpi->DstReg),
+ fpi->DstReg.WriteMask,
+ t_src(rp,
fpi->SrcReg[0]),
+ t_src(rp,
fpi->SrcReg[1]),
+ pfs_zero, flags);
+ break;
case OPCODE_DPH:
+ //test:
+ src0 = t_src(rp, fpi->SrcReg[0]);
+ src1 = t_src(rp, fpi->SrcReg[1]);
+ dest = t_dst(rp, fpi->DstReg);
+
+ temp = get_temp_reg(rp);
+
+ if (fpi->DstReg.WriteMask & WRITEMASK_W) {
+ /* I assume these need to share the same alu
slot */
+ sync_streams(rp);
+ emit_arith(rp, PFS_OP_DP4, temp, WRITEMASK_W,
+ pfs_zero,
pfs_zero, pfs_zero,
+ 0);
+ }
+ emit_arith(rp, PFS_OP_DP3, temp,
+ WRITEMASK_XYZ,
+ src0,
+ src1,
+ pfs_zero, 0);
+ emit_arith(rp, PFS_OP_MAD, dest, fpi->DstReg.WriteMask,
+ temp,
+ pfs_one,
+ swizzle(rp, src1,
SWIZZLE_W),
+ flags);
+ free_temp(rp, temp);
+ break;
case OPCODE_DST:
+ //test:
+ src0 = t_src(rp, fpi->SrcReg[0]);
+ src1 = t_src(rp, fpi->SrcReg[1]);
+
+ // result.x = 1.0;
+ // result.y = src0.y * src1.y;
+ // result.z = src0.z;
+ // result.w = src1.w;
+
+ temp = get_temp_reg(rp);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZ,
+ swizzle(rp, src0, MAKE_SWZ3(ZERO, Y,
Z)),
+ swizzle(rp, src1, MAKE_SWZ3(ZERO, Y,
ONE)),
+ swizzle(rp, src1, MAKE_SWZ3(ONE,ZERO,
ZERO)),
+ 0);
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
+ pfs_zero,
+ pfs_zero,
+ src1,
+ 0);
+ emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask,
+ pfs_zero,
+ pfs_zero,
+ temp,
+ flags);
+
+ free_temp(rp, temp);
+ break;
case OPCODE_EX2:
+ //test:
+ emit_arith(rp, PFS_OP_EX2, t_dst(rp, fpi->DstReg),
+ fpi->DstReg.WriteMask,
+ t_src(rp,
fpi->SrcReg[0]), pfs_zero, pfs_zero,
+ flags | PFS_FLAG_ABS);
+ break;
case OPCODE_FLR:
+ //test:
+ src0 = t_src(rp, fpi->SrcReg[0]);
+ temp = get_temp_reg(rp);
+ emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_XYZW,
+ src0, pfs_zero,
pfs_zero,
+ flags);
+ emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask,
+ src0,
+ pfs_one,
+ negate(temp),
+ flags);
+ free_temp(rp, temp);
+ break;
case OPCODE_FRC:
+ //test:
+ emit_arith(rp, PFS_OP_FRC, t_dst(rp, fpi->DstReg),
+ fpi->DstReg.WriteMask,
+ t_src(rp,
fpi->SrcReg[0]), pfs_zero, pfs_zero,
+ flags);
+ break;
case OPCODE_KIL:
+ ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ break;
case OPCODE_LG2:
+ //test:
+ emit_arith(rp, PFS_OP_LG2, t_dst(rp, fpi->DstReg),
+ fpi->DstReg.WriteMask,
+ t_src(rp,
fpi->SrcReg[0]), pfs_zero, pfs_zero,
+ flags);
+ break;
case OPCODE_LIT:
ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
break;
@@ -826,8 +931,20 @@
flags);
break;
case OPCODE_MAX:
+ //test:
+ emit_arith(rp, PFS_OP_MAX, t_dst(rp, fpi->DstReg),
+ fpi->DstReg.WriteMask,
+ t_src(rp,
fpi->SrcReg[0]),
+ t_src(rp,
fpi->SrcReg[1]),
+ pfs_zero, flags);
+ break;
case OPCODE_MIN:
- ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ //test:
+ emit_arith(rp, PFS_OP_MIN, t_dst(rp, fpi->DstReg),
+ fpi->DstReg.WriteMask,
+ t_src(rp,
fpi->SrcReg[0]),
+ t_src(rp,
fpi->SrcReg[1]),
+ pfs_zero, flags);
break;
case OPCODE_MOV:
case OPCODE_SWZ:
@@ -857,7 +974,11 @@
free_temp(rp, temp);
break;
case OPCODE_RCP:
- ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ //test:
+ emit_arith(rp, PFS_OP_RCP, t_dst(rp, fpi->DstReg),
+ fpi->DstReg.WriteMask,
+ t_src(rp,
fpi->SrcReg[0]), pfs_zero, pfs_zero,
+ flags);
break;
case OPCODE_RSQ:
emit_arith(rp, PFS_OP_RSQ, t_dst(rp, fpi->DstReg),
@@ -866,11 +987,49 @@
flags | PFS_FLAG_ABS);
break;
case OPCODE_SCS:
+ ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ break;
case OPCODE_SGE:
+ //test:
+ src0 = t_src(rp, fpi->SrcReg[0]);
+ src1 = t_src(rp, fpi->SrcReg[1]);
+ temp = get_temp_reg(rp);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZW,
+ src0,
+ pfs_one,
+ negate(src1),
+ 0);
+ emit_arith(rp, PFS_OP_CMP, t_dst(rp, fpi->DstReg),
+ fpi->DstReg.WriteMask,
+ pfs_one,
+ pfs_zero,
+ temp,
+ flags);
+ free_temp(rp, temp);
+ break;
case OPCODE_SIN:
- case OPCODE_SLT:
ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
break;
+ case OPCODE_SLT:
+ //test:
+ src0 = t_src(rp, fpi->SrcReg[0]);
+ src1 = t_src(rp, fpi->SrcReg[1]);
+ temp = get_temp_reg(rp);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZW,
+ src0,
+ pfs_one,
+ negate(src1),
+ 0);
+ emit_arith(rp, PFS_OP_CMP, t_dst(rp, fpi->DstReg),
+ fpi->DstReg.WriteMask,
+ pfs_zero,
+ pfs_one,
+ temp,
+ flags);
+ free_temp(rp, temp);
+ break;
case OPCODE_SUB:
emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask,
t_src(rp,
fpi->SrcReg[0]),
@@ -888,7 +1047,24 @@
emit_tex(rp, fpi, R300_FPITX_OP_TXP);
break;
case OPCODE_XPD:
- ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ //test:
+ src0 = t_src(rp, fpi->SrcReg[0]);
+ src1 = t_src(rp, fpi->SrcReg[1]);
+ dest = t_dst(rp, fpi->DstReg);
+
+ temp = get_temp_reg(rp);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZ,
+ swizzle(rp, src0,
MAKE_SWZ3(Z, X, Y)),
+ swizzle(rp, src1,
MAKE_SWZ3(Y, Z, X)),
+ pfs_zero,
+ 0);
+ emit_arith(rp, PFS_OP_MAD, dest, fpi->DstReg.WriteMask
& WRITEMASK_XYZ,
+ swizzle(rp, src0,
MAKE_SWZ3(Y, Z, X)),
+ swizzle(rp, src1,
MAKE_SWZ3(Z, X, Y)),
+ negate(temp),
+ flags);
+ free_temp(rp, temp);
break;
default:
ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);