I've been having some fun with fragment shader and found allot of ops missing. someone might be interested in some more ops.

Fragment ops added:
- ABS
- CMP
- DP4
- DPH
- DST
- EX2
- FLR
- FRC
- LG2
- MAX
- MIN
- RCP
- SGE
- SLT
- XPD

Fragment ops still missing:
- COS
- KIL
- LIT
- SCS
- SIN

They all pass the basic test in Mesa/progs/fp though Humus demos (www.humus.ca) still are far from looking proper.

Now the Questions:

- Missing Commit from r300.sf.net:
When trying to Implement the KIL ops I found a commit by Ben Skeggs on r300.sf.net that was lost in the Mesa tree:

http://sourceforge.net/mailarchive/forum.php?thread_id=7728162&forum_id=42268

At the very least the changes for r300_reg.h should be included in Mesa.

- whats with the DP3 op?

        if (fpi->DstReg.WriteMask & WRITEMASK_W) {
                /* I assume these need to share the same alu slot */
                sync_streams(rp);
                emit_arith(rp, PFS_OP_DP4, dest, WRITEMASK_W,
                        pfs_zero, pfs_zero, pfs_zero,
                        flags);
        }
        emit_arith(rp, PFS_OP_DP3, t_dst(rp, fpi->DstReg),
                fpi->DstReg.WriteMask & WRITEMASK_XYZ,
                t_src(rp, fpi->SrcReg[0]),
                t_src(rp, fpi->SrcReg[1]),
                pfs_zero, flags);

Why is DP4 called for W and why does DP3 excluding W?
I don't see how it can conform to the specs:

      tmp0 = VectorLoad(op0);
      tmp1 = VectorLoad(op1);
      dot = (tmp0.x * tmp1.x) + (tmp0.y * tmp1.y) + (tmp0.z * tmp1.z);
      result.x = dot;
      result.y = dot;
      result.z = dot;
      result.w = dot;
Index: r300_fragprog.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r300/r300_fragprog.c,v
retrieving revision 1.16
diff -u -r1.16 r300_fragprog.c
--- r300_fragprog.c     20 Nov 2005 17:52:40 -0000      1.16
+++ r300_fragprog.c     6 Jan 2006 15:47:45 -0000
@@ -763,7 +763,10 @@
                
                switch (fpi->Opcode) {
                case OPCODE_ABS:
-                       ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+                       // test:
+                       emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), 
fpi->DstReg.WriteMask,
+                                                       t_src(rp, 
fpi->SrcReg[0]), pfs_one, pfs_zero, 
+                                                       flags | PFS_FLAG_ABS);
                        break;
                case OPCODE_ADD:
                        emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), 
fpi->DstReg.WriteMask,
@@ -773,6 +776,13 @@
                                                        flags);
                        break;
                case OPCODE_CMP:
+                       //test:
+                       emit_arith(rp, PFS_OP_CMP, t_dst(rp, fpi->DstReg), 
fpi->DstReg.WriteMask,
+                                                       t_src(rp, 
fpi->SrcReg[2]),
+                                                       t_src(rp, 
fpi->SrcReg[1]),
+                                                       t_src(rp, 
fpi->SrcReg[0]),
+                                                       flags);
+                       break;
                case OPCODE_COS:
                        ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
                        break;
@@ -792,13 +802,108 @@
                                                        pfs_zero, flags);
                        break;
                case OPCODE_DP4:
+                       //test:
+                       emit_arith(rp, PFS_OP_DP4, t_dst(rp, fpi->DstReg),
+                                                       fpi->DstReg.WriteMask,
+                                                       t_src(rp, 
fpi->SrcReg[0]),
+                                                       t_src(rp, 
fpi->SrcReg[1]),
+                                                       pfs_zero, flags);
+                       break;
                case OPCODE_DPH:
+                       //test:
+                       src0 = t_src(rp, fpi->SrcReg[0]);
+                       src1 = t_src(rp, fpi->SrcReg[1]);
+                       dest = t_dst(rp, fpi->DstReg);
+                       
+                       temp = get_temp_reg(rp);
+                       
+                       if (fpi->DstReg.WriteMask & WRITEMASK_W) {
+                               /* I assume these need to share the same alu 
slot */
+                               sync_streams(rp);
+                               emit_arith(rp, PFS_OP_DP4, temp, WRITEMASK_W, 
+                                                               pfs_zero, 
pfs_zero, pfs_zero,
+                                                               0);
+                       }
+                       emit_arith(rp, PFS_OP_DP3, temp,
+                                                       WRITEMASK_XYZ,
+                                                       src0,
+                                                       src1,
+                                                       pfs_zero, 0);
+                       emit_arith(rp, PFS_OP_MAD, dest, fpi->DstReg.WriteMask,
+                                                       temp,
+                                                       pfs_one,
+                                                       swizzle(rp, src1, 
SWIZZLE_W),
+                                                       flags);
+                       free_temp(rp, temp);
+                       break;
                case OPCODE_DST:
+                       //test:
+                       src0 = t_src(rp, fpi->SrcReg[0]);
+                       src1 = t_src(rp, fpi->SrcReg[1]);
+
+                       // result.x = 1.0;
+                       // result.y = src0.y * src1.y;
+                       // result.z = src0.z;
+                       // result.w = src1.w;
+
+                       temp = get_temp_reg(rp);
+                       
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZ,
+                                   swizzle(rp, src0, MAKE_SWZ3(ZERO,   Y,    
Z)),
+                                   swizzle(rp, src1, MAKE_SWZ3(ZERO,   Y,  
ONE)),
+                                   swizzle(rp, src1, MAKE_SWZ3(ONE,ZERO, 
ZERO)),
+                                   0);
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
+                                   pfs_zero,
+                                   pfs_zero,
+                                   src1,
+                                   0);
+                       emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), 
fpi->DstReg.WriteMask,
+                                   pfs_zero,
+                                   pfs_zero,
+                                   temp,
+                                   flags);
+
+                       free_temp(rp, temp);
+                       break;
                case OPCODE_EX2:
+                       //test:
+                       emit_arith(rp, PFS_OP_EX2, t_dst(rp, fpi->DstReg),
+                                                       fpi->DstReg.WriteMask,
+                                                       t_src(rp, 
fpi->SrcReg[0]), pfs_zero, pfs_zero,
+                                                       flags | PFS_FLAG_ABS);
+                       break;
                case OPCODE_FLR:
+                       //test:
+                       src0 = t_src(rp, fpi->SrcReg[0]);
+                       temp = get_temp_reg(rp);
+                       emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_XYZW,
+                                                       src0, pfs_zero, 
pfs_zero,
+                                                       flags);
+                       emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), 
fpi->DstReg.WriteMask,
+                                                       src0,
+                                                       pfs_one,
+                                                       negate(temp),
+                                                       flags);
+                       free_temp(rp, temp);
+                       break;
                case OPCODE_FRC:
+                       //test:
+                       emit_arith(rp, PFS_OP_FRC, t_dst(rp, fpi->DstReg),
+                                                       fpi->DstReg.WriteMask,
+                                                       t_src(rp, 
fpi->SrcReg[0]), pfs_zero, pfs_zero,
+                                                       flags);
+                       break;
                case OPCODE_KIL:
+                       ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+                       break;
                case OPCODE_LG2:
+                       //test:
+                       emit_arith(rp, PFS_OP_LG2, t_dst(rp, fpi->DstReg),
+                                                       fpi->DstReg.WriteMask,
+                                                       t_src(rp, 
fpi->SrcReg[0]), pfs_zero, pfs_zero,
+                                                       flags);
+                       break;
                case OPCODE_LIT:
                        ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
                        break;
@@ -826,8 +931,20 @@
                                                        flags);
                        break;
                case OPCODE_MAX:
+                       //test:
+                       emit_arith(rp, PFS_OP_MAX, t_dst(rp, fpi->DstReg),
+                                                       fpi->DstReg.WriteMask,
+                                                       t_src(rp, 
fpi->SrcReg[0]),
+                                                       t_src(rp, 
fpi->SrcReg[1]),
+                                                       pfs_zero, flags);
+                       break;
                case OPCODE_MIN:
-                       ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+                       //test:
+                       emit_arith(rp, PFS_OP_MIN, t_dst(rp, fpi->DstReg),
+                                                       fpi->DstReg.WriteMask,
+                                                       t_src(rp, 
fpi->SrcReg[0]),
+                                                       t_src(rp, 
fpi->SrcReg[1]),
+                                                       pfs_zero, flags);
                        break;
                case OPCODE_MOV:
                case OPCODE_SWZ:
@@ -857,7 +974,11 @@
                        free_temp(rp, temp);
                        break;
                case OPCODE_RCP:
-                       ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+                       //test:
+                       emit_arith(rp, PFS_OP_RCP, t_dst(rp, fpi->DstReg),
+                                                       fpi->DstReg.WriteMask,
+                                                       t_src(rp, 
fpi->SrcReg[0]), pfs_zero, pfs_zero,
+                                                       flags);
                        break;
                case OPCODE_RSQ:
                        emit_arith(rp, PFS_OP_RSQ, t_dst(rp, fpi->DstReg),
@@ -866,11 +987,49 @@
                                                        flags | PFS_FLAG_ABS);
                        break;
                case OPCODE_SCS:
+                       ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+                       break;
                case OPCODE_SGE:
+                       //test:
+                       src0 = t_src(rp, fpi->SrcReg[0]);
+                       src1 = t_src(rp, fpi->SrcReg[1]);
+                       temp = get_temp_reg(rp);
+
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZW,
+                                   src0,
+                                   pfs_one,
+                                   negate(src1),
+                                   0);
+                       emit_arith(rp, PFS_OP_CMP, t_dst(rp, fpi->DstReg),
+                                   fpi->DstReg.WriteMask,
+                                   pfs_one,
+                                   pfs_zero,
+                                   temp,
+                                   flags);
+                       free_temp(rp, temp);
+                       break;
                case OPCODE_SIN:
-               case OPCODE_SLT:
                        ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
                        break;
+               case OPCODE_SLT:
+                       //test:
+                       src0 = t_src(rp, fpi->SrcReg[0]);
+                       src1 = t_src(rp, fpi->SrcReg[1]);
+                       temp = get_temp_reg(rp);
+
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZW,
+                                   src0,
+                                   pfs_one,
+                                   negate(src1),
+                                   0);
+                       emit_arith(rp, PFS_OP_CMP, t_dst(rp, fpi->DstReg),
+                                   fpi->DstReg.WriteMask,
+                                   pfs_zero,
+                                   pfs_one,
+                                   temp,
+                                   flags);
+                       free_temp(rp, temp);
+                       break;
                case OPCODE_SUB:
                        emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), 
fpi->DstReg.WriteMask,
                                                        t_src(rp, 
fpi->SrcReg[0]),
@@ -888,7 +1047,24 @@
                        emit_tex(rp, fpi, R300_FPITX_OP_TXP);
                        break;
                case OPCODE_XPD:
-                       ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+                       //test:
+                       src0 = t_src(rp, fpi->SrcReg[0]);
+                       src1 = t_src(rp, fpi->SrcReg[1]);
+                       dest = t_dst(rp, fpi->DstReg);
+                       
+                       temp = get_temp_reg(rp);
+                       
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZ,
+                                                       swizzle(rp, src0, 
MAKE_SWZ3(Z, X, Y)),
+                                                       swizzle(rp, src1, 
MAKE_SWZ3(Y, Z, X)),
+                                                       pfs_zero,
+                                                       0);
+                       emit_arith(rp, PFS_OP_MAD, dest, fpi->DstReg.WriteMask 
& WRITEMASK_XYZ,
+                                                       swizzle(rp, src0, 
MAKE_SWZ3(Y, Z, X)),
+                                                       swizzle(rp, src1, 
MAKE_SWZ3(Z, X, Y)),
+                                                       negate(temp),
+                                                       flags);
+                       free_temp(rp, temp);
                        break;
                default:
                        ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);

Reply via email to