Okay i finaly came over a stupid bug (as all bugs are...). Thus i commited the table to r300 and here is what look like swizzle & modified emit_arithm (there is some debug code to test swizzling)...
Note that i changed pfs_reg_t thus swizzling is done in emit arith and note in t_src. This way we can have multiple constant as arg for emit_arith and then swizzling alloc & copy const for us (have to add 7 native case to the table for that). If you think that i remove on important field in pfs_reg tell me. I am wondering if we can drop the valid field ? I haven't yet done indivual or global neg but as i said i think that the best solution is to first swizzle and then do a MAD t, -t, 1, 0 with appropriate write mask. Anyway once Keith commited your patch and you commited your change in r300, i will commit change to use table with individual neg support... Jerome Glisse typedef struct _pfs_reg_t { enum { REG_TYPE_INPUT, REG_TYPE_OUTPUT, REG_TYPE_TEMP, REG_TYPE_CONST } type:2; GLuint index:6; GLuint xyzw:12; GLuint negate:4; GLboolean has_w:1; GLboolean valid:1; } pfs_reg_t; GLuint swizzle( struct r300_fragment_program *rp, pfs_reg_t swz_src ) { GLuint src[3] = { 0, 0, 0 }; GLuint inst[4] = { 0, 0, 0, 0 }; GLuint i, xyz, w, j; pfs_reg_t tmp; switch (swz_src.type) { case REG_TYPE_INPUT: src[0] = rp->inputs[swz_src.index]; break; case REG_TYPE_TEMP: src[0] = rp->temps[swz_src.index]; src[0] = swz_src.index; rp->used_in_node |= (1 << src[0]); break; case REG_TYPE_CONST: src[0] = swz_src.index; break; default: ERROR("invalid source reg\n"); return 0; } /* Allocate temp reg for swizzling */ tmp = get_temp_reg(rp); src[1] = tmp.index; xyz = swz_src.xyzw & 511; w = (swz_src.xyzw >> 9) & 7; printf("w : %d\n",w); inst[2] = r300_swz_srca_mask[0][w] | (R300_FPI2_ARGA_ONE << R300_FPI2_ARG1A_SHIFT) | (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT) | R300_FPI0_OUTC_MAD; inst[3] = src[0] | R300_FPI3_SRC1A_CONST | R300_FPI3_SRC2A_CONST | (src[1] << R300_FPI3_DSTA_SHIFT); inst[3] |= R300_FPI3_DSTA_REG; for (i = 0; i < r300_swizzle[xyz].length; i++) { inst[0] = r300_swizzle[xyz].inst[(i << 1)]; inst[1] = r300_swizzle[xyz].inst[(i << 1) + 1]; inst[1] |= src[r300_swizzle[xyz].src[i]]; inst[1] |= src[1] << R300_FPI1_DSTC_SHIFT; rp->alu.inst[rp->v_pos].inst0 = inst[0]; rp->alu.inst[rp->v_pos].inst1 = inst[1]; rp->alu.inst[rp->s_pos].inst2 = inst[2]; rp->alu.inst[rp->s_pos].inst3 = inst[3]; rp->v_pos += 1; rp->s_pos += 1; j = rp->v_pos > rp->s_pos ? rp->v_pos : rp->s_pos; if (j > rp->alu.length) { rp->alu.length++; rp->node[rp->cur_node].alu_end++; } } return src[1]; } static void emit_arith( struct r300_fragment_program *rp, int op, pfs_reg_t dest, int mask, pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2, int flags ) { pfs_reg_t src[3] = { src0, src1, src2 }; int hwdest, hwsrc[3]; int argc; int v_idx = rp->v_pos, s_idx = rp->s_pos; GLuint inst[4] = { 0, 0, 0, 0 }; GLuint srcc_mask, srca_mask; int i; pfs_reg_t tt_reg = get_temp_reg(rp); GLuint tt_id = tt_reg.index; /* check opcode */ if (op > MAX_PFS_OP) { ERROR("unknown opcode!\n"); return; } argc = r300_fpop[op].argc; /* grab hwregs of sources */ for (i=0;i<argc;i++) { switch (src[i].type) { case REG_TYPE_INPUT: hwsrc[i] = rp->inputs[src[i].index]; break; case REG_TYPE_TEMP: hwsrc[i] = rp->temps[src[i].index]; rp->used_in_node |= (1 << hwsrc[i]); break; case REG_TYPE_CONST: hwsrc[i] = src[i].index; break; default: ERROR("invalid source reg\n"); return; } } /* grab hwregs of dest */ switch (dest.type) { case REG_TYPE_TEMP: hwdest = rp->temps[dest.index]; rp->used_in_node |= (1 << hwdest); break; case REG_TYPE_OUTPUT: hwdest = 0; break; default: ERROR("invalid dest reg type %d\n", dest.type); return; } for (i=0;i<3;i++) { if (i < argc) { #define GET_XYZ(u) ((u) & 511) #define GET_W(u) (((u) >> 9) & 7) if (0) { printf("------------------------------\n"); printf("zero a %d %d %d\n", i, GET_XYZ(pfs_zero.xyzw), GET_W(pfs_zero.xyzw)); printf("one a %d %d %d\n", i, GET_XYZ(pfs_one.xyzw), GET_W(pfs_one.xyzw)); printf("arith a %d %d %d\n", i, GET_XYZ(src[i].xyzw), GET_W(src[i].xyzw)); } srcc_mask=r300_swz_srcc_mask[i][GET_XYZ(src[i].xyzw)]; srca_mask=r300_swz_srca_mask[i][GET_W(src[i].xyzw)]; if (srcc_mask & 32) { /* swizzle */ hwsrc[i] = swizzle(rp, src[i]); inst[0] |= r300_swz_srcc_mask[i][136] << (i*7); inst[2] |= r300_swz_srca_mask[i][3] << (i*7); } else { /* native format lucky :) */ inst[0] |= srcc_mask << (i*7); inst[2] |= srca_mask << (i*7); if (src[i].type == REG_TYPE_CONST) { inst[1] |= (1<<5) << (i*6); inst[3] |= (1<<5) << (i*6); } } inst[1] |= hwsrc[i] << (i*6); inst[3] |= hwsrc[i] << (i*6); } else { /* read constant zero, may aswell use a ZERO swizzle aswell.. */ inst[0] |= R300_FPI0_ARGC_ZERO << (i*7); inst[2] |= R300_FPI2_ARGA_ZERO << (i*7); inst[1] |= (1<<5) << (i*6); inst[2] |= (1<<5) << (i*6); } } if (mask & 7) { rp->alu.inst[v_idx].inst0 = inst[0] | r300_fpop[op].v_op |flags; #if 1 rp->alu.inst[v_idx].inst1 = inst[1] | (tt_id << R300_FPI1_DSTC_SHIFT) | ((mask & WRITEMASK_XYZ) << 23); #else rp->alu.inst[v_idx].inst1 = inst[1] | (hwdest << R300_FPI1_DSTC_SHIFT) | ((mask & WRITEMASK_XYZ) << (dest.type == REG_TYPE_OUTPUT ? 26 : 23)); #endif rp->v_pos = v_idx + 1; } if (mask & 8) { rp->alu.inst[s_idx].inst2 = inst[2] | r300_fpop[op].s_op |flags; #if 1 rp->alu.inst[s_idx].inst3 = inst[3] | (tt_id << R300_FPI3_DSTA_SHIFT) | (1 << 23); #else rp->alu.inst[s_idx].inst3 = inst[3] | (hwdest << R300_FPI3_DSTA_SHIFT) | (1 << (dest.type == REG_TYPE_OUTPUT ? 24 : 23)); #endif rp->s_pos = s_idx + 1; } i = rp->v_pos > rp->s_pos ? rp->v_pos : rp->s_pos; if (i > rp->alu.length) { rp->alu.length++; rp->node[rp->cur_node].alu_end++; } #if 1 tt_reg.xyzw = (SWIZZLE_Z) | (SWIZZLE_Y << 3)| (SWIZZLE_X << 6)| (SWIZZLE_W << 9); tt_id = swizzle(rp, tt_reg); // tt_id = 0; v_idx = rp->v_pos; s_idx = rp->s_pos; printf("reg : %d\n",tt_id); inst[0] = r300_swz_srcc_mask[0][136] << (0*7); inst[2] = r300_swz_srca_mask[0][3] << (0*7); inst[0] |= r300_swz_srcc_mask[0][365] << (1*7); inst[2] |= r300_swz_srca_mask[0][5] << (1*7); inst[0] |= r300_swz_srcc_mask[0][292] << (2*7); inst[2] |= r300_swz_srca_mask[0][4] << (2*7); inst[1] = tt_id; inst[3] = tt_id; inst[1] |= (1<<5) << (1*6); inst[1] |= (1<<5) << (2*6); inst[3] |= (1<<5) << (1*6); inst[3] |= (1<<5) << (2*6); if (0) { inst[1] |= (1<<5); inst[3] |= (1<<5); } if (mask & 7) { inst[1] |= (hwdest << R300_FPI1_DSTC_SHIFT) | ((mask & WRITEMASK_XYZ) << (dest.type == REG_TYPE_OUTPUT ? 26 : 23)); rp->alu.inst[v_idx].inst0 = inst[0]; rp->alu.inst[v_idx].inst1 = inst[1]; rp->v_pos = v_idx + 1; } if (mask & 8) { inst[3] |= (hwdest << R300_FPI3_DSTA_SHIFT) | (1 << (dest.type == REG_TYPE_OUTPUT ? 24 : 23)); rp->alu.inst[s_idx].inst2 = inst[2]; rp->alu.inst[s_idx].inst3 = inst[3]; rp->s_pos = s_idx + 1; } i = rp->v_pos > rp->s_pos ? rp->v_pos : rp->s_pos; if (i > rp->alu.length) { rp->alu.length++; rp->node[rp->cur_node].alu_end++; } #endif return; } ------------------------------------------------------- This SF.Net email is sponsored by Oracle Space Sweepstakes Want to be the first software developer in space? Enter now for the Oracle Space Sweepstakes! http://ads.osdn.com/?ad_idt12&alloc_id344&op=click -- _______________________________________________ Dri-devel mailing list Dri-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/dri-devel