--- src/mesa/Makefile.sources | 1 + src/mesa/state_tracker/st_atifs_to_tgsi.c | 798 ++++++++++++++++++++++++++++++ src/mesa/state_tracker/st_atifs_to_tgsi.h | 49 ++ src/mesa/state_tracker/st_atom_constbuf.c | 14 + src/mesa/state_tracker/st_cb_drawpixels.c | 1 + src/mesa/state_tracker/st_cb_program.c | 35 +- src/mesa/state_tracker/st_program.c | 22 + src/mesa/state_tracker/st_program.h | 1 + 8 files changed, 920 insertions(+), 1 deletion(-) create mode 100644 src/mesa/state_tracker/st_atifs_to_tgsi.c create mode 100644 src/mesa/state_tracker/st_atifs_to_tgsi.h
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index ed9848c..a8e645d 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -390,6 +390,7 @@ VBO_FILES = \ vbo/vbo_split_inplace.c STATETRACKER_FILES = \ + state_tracker/st_atifs_to_tgsi.c \ state_tracker/st_atom_array.c \ state_tracker/st_atom_blend.c \ state_tracker/st_atom.c \ diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.c b/src/mesa/state_tracker/st_atifs_to_tgsi.c new file mode 100644 index 0000000..1d704cb --- /dev/null +++ b/src/mesa/state_tracker/st_atifs_to_tgsi.c @@ -0,0 +1,798 @@ + +#include "main/mtypes.h" +#include "main/atifragshader.h" +#include "main/texobj.h" +#include "main/errors.h" +#include "program/prog_parameter.h" + +#include "tgsi/tgsi_ureg.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "st_program.h" +#include "st_atifs_to_tgsi.h" + +/** + * Intermediate state used during shader translation. + */ +struct st_translate { + struct ureg_program *ureg; + struct gl_context *ctx; + struct ati_fragment_shader *atifs; + + struct ureg_dst temps[MAX_PROGRAM_TEMPS]; + struct ureg_src *constants; + struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; + struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; + struct ureg_dst address[1]; + struct ureg_src samplers[PIPE_MAX_SAMPLERS]; + struct ureg_src systemValues[SYSTEM_VALUE_MAX]; + + const GLuint *inputMapping; + const GLuint *outputMapping; + + /* Keep a record of the tgsi instruction number that each mesa + * instruction starts at, will be used to fix up labels after + * translation. + */ + unsigned *insn; + unsigned insn_size; + unsigned insn_count; + + unsigned current_pass; + + bool regs_written[MAX_NUM_PASSES_ATI][MAX_NUM_FRAGMENT_REGISTERS_ATI]; + + boolean error; +}; + +struct instruction_desc { + unsigned TGSI_opcode; + const char *name; + unsigned char arg_count; + unsigned char special; /* no 1:1 corresponding TGSI instruction */ +}; + +/* index this array as inst_desc[ATI_opcode-GL_MOV_ATI] */ +static struct instruction_desc inst_desc[] = { + {TGSI_OPCODE_MOV, "MOV", 1, 0}, + {TGSI_OPCODE_NOP, "UND", 0, 0}, /* unused */ + {TGSI_OPCODE_ADD, "ADD", 2, 0}, + {TGSI_OPCODE_MUL, "MUL", 2, 0}, + {TGSI_OPCODE_SUB, "SUB", 2, 0}, + {TGSI_OPCODE_DP3, "DOT3", 2, 0}, + {TGSI_OPCODE_DP4, "DOT4", 2, 0}, + {TGSI_OPCODE_MAD, "MAD", 3, 0}, + {TGSI_OPCODE_LRP, "LERP", 3, 0}, + {TGSI_OPCODE_NOP, "CND", 3, 1}, + {TGSI_OPCODE_NOP, "CND0", 3, 2}, + {TGSI_OPCODE_NOP, "DOT2_ADD", 3, 3} +}; + +/** + * Called prior to emitting the TGSI code for each Mesa instruction. + * Allocate additional space for instructions if needed. + * Update the insn[] array so the next Mesa instruction points to + * the next TGSI instruction. + * Copied from st_mesa_to_tgsi.c + */ +static void set_insn_start(struct st_translate *t, + unsigned start) +{ + if (t->insn_count + 1 >= t->insn_size) { + t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); + t->insn = realloc(t->insn, t->insn_size * sizeof t->insn[0]); + if (t->insn == NULL) { + t->error = TRUE; + return; + } + } + + t->insn[t->insn_count++] = start; +} + +static void emit_insn(struct st_translate *t, + unsigned opcode, + const struct ureg_dst *dst, + unsigned nr_dst, + const struct ureg_src *src, + unsigned nr_src) +{ + set_insn_start(t, ureg_get_instruction_number(t->ureg)); + ureg_insn(t->ureg, opcode, dst, nr_dst, src, nr_src); +} + +static struct ureg_dst get_temp(struct st_translate *t, unsigned index) +{ + if (ureg_dst_is_undef(t->temps[index])) + t->temps[index] = ureg_DECL_temporary(t->ureg); + return t->temps[index]; +} + +static struct ureg_src apply_swizzle(struct st_translate *t, + struct ureg_src src, GLuint swizzle) +{ + if (swizzle == GL_SWIZZLE_STR_ATI) { + return src; + } else if (swizzle == GL_SWIZZLE_STQ_ATI) { + return ureg_swizzle(src, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z); + } else { + struct ureg_dst tmp[2]; + struct ureg_src imm[3]; + + tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); + tmp[1] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI+1); + imm[0] = src; + imm[1] = ureg_imm4f(t->ureg, 1.0, 1.0, 0.0, 0.0); + imm[2] = ureg_imm4f(t->ureg, 0.0, 0.0, 1.0, 1.0); + emit_insn(t, TGSI_OPCODE_MAD, &tmp[0], 1, imm, 3); + + if (swizzle == GL_SWIZZLE_STR_DR_ATI) + imm[0] = ureg_swizzle(src, + TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z); + else + imm[0] = ureg_swizzle(src, + TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); + emit_insn(t, TGSI_OPCODE_RCP, &tmp[1], 1, &imm[0], 1); + + imm[0] = ureg_src(tmp[0]); + imm[1] = ureg_src(tmp[1]); + emit_insn(t, TGSI_OPCODE_MUL, &tmp[0], 1, imm, 2); + + return ureg_src(tmp[0]); + } +} + +/** + * Map mesa texture target to TGSI texture target. + * copied from st_mesa_to_tgsi.c + */ +static unsigned translate_texture_target(GLuint textarget) +{ + /* the shadow part is omitted */ + switch( textarget ) { + case TEXTURE_2D_MULTISAMPLE_INDEX: return TGSI_TEXTURE_2D_MSAA; + case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: return TGSI_TEXTURE_2D_ARRAY_MSAA; + case TEXTURE_BUFFER_INDEX: return TGSI_TEXTURE_BUFFER; + case TEXTURE_1D_INDEX: return TGSI_TEXTURE_1D; + case TEXTURE_2D_INDEX: return TGSI_TEXTURE_2D; + case TEXTURE_3D_INDEX: return TGSI_TEXTURE_3D; + case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE; + case TEXTURE_CUBE_ARRAY_INDEX: return TGSI_TEXTURE_CUBE_ARRAY; + case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT; + case TEXTURE_1D_ARRAY_INDEX: return TGSI_TEXTURE_1D_ARRAY; + case TEXTURE_2D_ARRAY_INDEX: return TGSI_TEXTURE_2D_ARRAY; + case TEXTURE_EXTERNAL_INDEX: return TGSI_TEXTURE_2D; + default: + debug_assert( 0 ); + return TGSI_TEXTURE_1D; + } +} + +static +gl_texture_index get_texture_target_index(struct gl_context *ctx, + const unsigned r) +{ + struct gl_texture_object *texObj = ctx->Texture.Unit[r]._Current; + if (texObj) { + //TODO TargetIndex should be used here, but that's not always set + return _mesa_tex_target_to_index(ctx, texObj->Target); + } else { + /* fallback for missing texture */ + //_mesa_debug(0, "texture %u is missing\n", r); + return TEXTURE_2D_INDEX; + } +} + +static struct ureg_src get_source(struct st_translate *t, GLuint src_type) +{ + //_mesa_debug(0, "source type %u\n", src_type); + if (src_type >= GL_REG_0_ATI && src_type <= GL_REG_5_ATI) { + //TODO why are registers defined up to 31 in glext.h? + if (t->regs_written[t->current_pass][src_type - GL_REG_0_ATI]) + return ureg_src(get_temp(t, src_type - GL_REG_0_ATI)); + else + return ureg_imm1f(t->ureg, 0.0); + } else if (src_type >= GL_CON_0_ATI && src_type <= GL_CON_7_ATI) { + //TODO why are constants defined up to 31 in glext.h? + return t->constants[src_type - GL_CON_0_ATI]; + } else if (src_type == GL_ZERO) { + return ureg_imm1f(t->ureg, 0.0); + } else if (src_type == GL_ONE) { + return ureg_imm1f(t->ureg, 1.0); + } else if (src_type == GL_PRIMARY_COLOR_ARB) { + return t->inputs[t->inputMapping[VARYING_SLOT_COL0]]; + } else if (src_type == GL_SECONDARY_INTERPOLATOR_ATI) { + return t->inputs[t->inputMapping[VARYING_SLOT_COL1]]; + } else { + assert(!"unknown source"); + return ureg_imm1f(t->ureg, 0.0); + } +} + +static struct ureg_src prepare_argument(struct st_translate *t, const unsigned argId, + const struct atifragshader_src_register *srcReg) +{ + struct ureg_src src = get_source(t, srcReg->Index); + struct ureg_dst arg = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI+argId); + + switch (srcReg->argRep) { + case GL_NONE: + break; + case GL_RED: + src = ureg_swizzle(src, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + break; + case GL_GREEN: + src = ureg_swizzle(src, + TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y); + break; + case GL_BLUE: + src = ureg_swizzle(src, + TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z); + break; + case GL_ALPHA: + src = ureg_swizzle(src, + TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); + break; + } + emit_insn(t, TGSI_OPCODE_MOV, &arg, 1, &src, 1); + + if (srcReg->argMod & GL_COMP_BIT_ATI) { + struct ureg_src modsrc[2]; + modsrc[0] = ureg_imm1f(t->ureg, 1.0); + modsrc[1] = ureg_src(arg); + + emit_insn(t, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2); + } + if (srcReg->argMod & GL_BIAS_BIT_ATI) { + struct ureg_src modsrc[2]; + modsrc[0] = ureg_src(arg); + modsrc[1] = ureg_imm1f(t->ureg, 0.5); + + emit_insn(t, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2); + } + if (srcReg->argMod & GL_2X_BIT_ATI) { + struct ureg_src modsrc[2]; + modsrc[0] = ureg_src(arg); + modsrc[1] = ureg_imm1f(t->ureg, 2.0); + + emit_insn(t, TGSI_OPCODE_MUL, &arg, 1, modsrc, 2); + } + if (srcReg->argMod & GL_NEGATE_BIT_ATI) { + struct ureg_src modsrc[2]; + modsrc[0] = ureg_src(arg); + modsrc[1] = ureg_imm1f(t->ureg, -1.0); + + emit_insn(t, TGSI_OPCODE_MUL, &arg, 1, modsrc, 2); + } + return ureg_src(arg); +} + +/* These instructions have no direct equivalent in TGSI */ +static void emit_special_inst(struct st_translate *t, struct instruction_desc *desc, + struct ureg_dst *dst, struct ureg_src *args, unsigned argcount) +{ + struct ureg_dst tmp[1]; + struct ureg_src src[3]; + + if (desc->special == 1) { + tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI+2); // re-purpose a3 + src[0] = ureg_imm1f(t->ureg, 0.5f); + src[1] = args[2]; + emit_insn(t, TGSI_OPCODE_SLT, tmp, 1, src, 2); + src[0] = ureg_src(tmp[0]); + src[1] = args[0]; + src[2] = args[1]; + emit_insn(t, TGSI_OPCODE_LRP, dst, 1, src, 3); + } else if (desc->special == 2) { + tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI+2); // re-purpose a3 + src[0] = args[2]; + src[1] = ureg_imm1f(t->ureg, 0.0f); + emit_insn(t, TGSI_OPCODE_SGE, tmp, 1, src, 2); + src[0] = ureg_src(tmp[0]); + src[1] = args[0]; + src[2] = args[1]; + emit_insn(t, TGSI_OPCODE_LRP, dst, 1, src, 3); + } else if (desc->special == 3) { + src[0] = args[0]; + src[1] = args[1]; + src[2] = ureg_swizzle(args[2], + TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z); + emit_insn(t, TGSI_OPCODE_DP2A, dst, 1, src, 3); + } +} + +static void emit_arith_inst(struct st_translate *t, + struct instruction_desc *desc, + struct ureg_dst *dst, struct ureg_src *args, unsigned argcount) +{ + if (desc->special) { + return emit_special_inst(t, desc, dst, args, argcount); + } + + emit_insn(t, desc->TGSI_opcode, dst, 1, args, argcount); +} + +static void emit_dstmod(struct st_translate *t, + struct ureg_dst dst, GLuint dstMod) +{ + float imm = 0.0; + struct ureg_src src[3]; + + if (dstMod == GL_NONE) { + return; + } + + if (dstMod & GL_2X_BIT_ATI) { + imm = 2.0f; + } else if (dstMod & GL_4X_BIT_ATI) { + imm = 4.0f; + } else if (dstMod & GL_8X_BIT_ATI) { + imm = 8.0f; + } else if (dstMod & GL_HALF_BIT_ATI) { + imm = 0.5f; + } else if (dstMod & GL_QUARTER_BIT_ATI) { + imm = 0.25f; + } else if (dstMod & GL_EIGHTH_BIT_ATI) { + imm = 0.125f; + } + if (imm) { + src[0] = ureg_src(dst); + src[1] = ureg_imm1f(t->ureg, imm); + if (dstMod & GL_SATURATE_BIT_ATI) { + dst = ureg_saturate(dst); + } + emit_insn(t, TGSI_OPCODE_MUL, &dst, 1, src, 2); + } +} + +/** + * Compile one setup instruction to TGSI instructions. + */ +static void compile_setupinst(struct gl_context *ctx, + struct st_translate *t, + const unsigned r, + const struct atifs_setupinst *texinst) +{ + struct ureg_dst dst[1]; + struct ureg_src src[2]; + + if (!texinst->Opcode) + return; + + dst[0] = get_temp(t, r); + + GLuint pass_tex = texinst->src; + + if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) { + unsigned attr = pass_tex - GL_TEXTURE0_ARB + VARYING_SLOT_TEX0; + src[0] = t->inputs[t->inputMapping[attr]]; + } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) { + unsigned reg = pass_tex - GL_REG_0_ATI; + /* the frontend already validated that REG is only allowed in second pass */ + if (t->regs_written[0][reg]) { + src[0] = ureg_src(t->temps[reg]); + } else { + src[0] = ureg_imm1f(t->ureg, 0.0); + } + } + src[0] = apply_swizzle(t, src[0], texinst->swizzle); + + set_insn_start(t, ureg_get_instruction_number(t->ureg)); + if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) { + /* use the current texture target for the sample operation + * note: this implementation doesn't support re-using an ATI_fs + * with different texture targets + */ + gl_texture_index index = get_texture_target_index(ctx, r); + unsigned target = translate_texture_target(index); + //_mesa_debug(0, "texture %u index %u target %u\n", r, index, target); + + // by default texture and sampler indexes are the same + //_mesa_debug(0, "using sampler %u\n", r); + src[1] = t->samplers[r]; + ureg_tex_insn(t->ureg, TGSI_OPCODE_TEX, dst, 1, target, + NULL, 0, src, 2); + } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) { + ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1); + } + + t->regs_written[t->current_pass][r] = true; +} + +/** + * Compile one arithmetic operation COLOR&ALPHA pair into TGSI instructions. + */ +static void compile_instruction(struct st_translate *t, + const struct atifs_instruction *inst) +{ + unsigned optype; + + for (optype=0; optype<2; optype++) { // color, alpha + struct instruction_desc *desc; + struct ureg_dst dst[1]; + struct ureg_src args[3]; // arguments for the main operation + unsigned arg; + unsigned dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI; + + if (!inst->Opcode[optype]) + continue; + + desc = &inst_desc[inst->Opcode[optype]-GL_MOV_ATI]; + + /* prepare the arguments */ + for (arg=0; arg<desc->arg_count; arg++) { + if (arg >= inst->ArgCount[optype]) { + _mesa_debug(0, "warning: using 0 for missing argument %d of %s\n", + arg, desc->name); + args[arg] = ureg_imm1f(t->ureg, 0.0); + } else { + args[arg] = prepare_argument(t, arg, + &inst->SrcReg[optype][arg]); + } + } + + /* prepare dst */ + dst[0] = get_temp(t, dstreg); + + if (optype) + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_W); + else { + GLuint dstMask = inst->DstReg[optype].dstMask; + if (dstMask == GL_NONE) + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ); + else + dst[0] = ureg_writemask(dst[0], dstMask); /* the enum values match */ + } + + /* emit the main instruction */ + emit_arith_inst(t, desc, dst, args, arg); + + emit_dstmod(t, *dst, inst->DstReg[optype].dstMod); + + t->regs_written[t->current_pass][dstreg] = true; + } +} + +/*static void add_tint(struct st_translate *t, float r, float g, float b) +{ + struct ureg_dst dst[1]; + struct ureg_src src[2]; + dst[0] = t->temps[0]; + src[0] = ureg_src(t->temps[0]); + src[1] = ureg_imm4f(t->ureg, r, g, b, 1.0); + emit_insn(t, TGSI_OPCODE_MUL, dst, 1, src, 2); +}*/ + +static void apply_fog(struct st_translate *t) +{ + struct gl_fog_attrib *fog = &t->ctx->Fog; + struct ureg_src oparams = t->constants[8]; + struct ureg_src fogcolor = t->constants[9]; + /* this is a single float in the X coordinate */ + struct ureg_src fogcoord = t->inputs[t->inputMapping[VARYING_SLOT_FOGC]]; + + struct ureg_dst dst[1]; + struct ureg_src src[3]; + + /* Detect ATIfs 8 of KotOR, and render black in this pass + * to get correct fog on the ground in Kashyyyk Shadowlands */ + if (t->atifs->NumPasses == 1) { + struct atifs_instruction *firstinst = &t->atifs->Instructions[0][0]; + if (firstinst->Opcode[0] == GL_MOV_ATI + && firstinst->SrcReg[0][0].Index == GL_PRIMARY_COLOR_ARB + && firstinst->DstReg[0].Index == GL_REG_0_ATI) { + _mesa_debug(0, "Skipping this pass\n"); + dst[0] = t->temps[0]; + src[0] = ureg_imm1f(t->ureg, 0.0f); + emit_insn(t, TGSI_OPCODE_MOV, dst, 1, src, 1); + return; + } + } + + _mesa_debug(0, "Adding fog\n"); + + /* compute the 1 component fog factor f */ + if (fog->Mode == GL_LINEAR) { + /* formula: f = (end - z) / (end - start) + * with optimized parameters: f = MAD(fogcoord, oparams.x, oparams.y) + */ + dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); + src[0] = fogcoord; + src[1] = ureg_swizzle(oparams, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + src[2] = ureg_swizzle(oparams, + TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y); + + emit_insn(t, TGSI_OPCODE_MAD, dst, 1, src, 3); + } else if (fog->Mode == GL_EXP) { + /* formula: f = exp(-dens * z) + * with optimized parameters: + * f = MUL(fogcoord, oparams.z); f= EX2(-f) + */ + dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); + src[0] = fogcoord; + src[1] = ureg_swizzle(oparams, + TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z); + emit_insn(t, TGSI_OPCODE_MUL, dst, 1, src, 2); + dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); + src[0] = ureg_negate(ureg_src(dst[0])); + emit_insn(t, TGSI_OPCODE_EX2, dst, 1, src, 1); + } else if (fog->Mode == GL_EXP2) { + /* formula: f = exp(-(dens * z)^2) + * with optimized parameters: + * f = MUL(fogcoord, oparams.w); f=MUL(f, f); f= EX2(-f) + */ + dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); + src[0] = fogcoord; + src[1] = ureg_swizzle(oparams, + TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); + emit_insn(t, TGSI_OPCODE_MUL, dst, 1, src, 2); + dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); + src[0] = ureg_src(dst[0]); + emit_insn(t, TGSI_OPCODE_MUL, dst, 1, src, 2); + dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); + src[0] = ureg_negate(ureg_src(dst[0])); + emit_insn(t, TGSI_OPCODE_EX2, dst, 1, src, 1); + } + + //TODO do we need f = CLAMP(f)? ff_fs does this, programopt doesn't + dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); + src[0] = ureg_src(dst[0]); + src[1] = ureg_imm1f(t->ureg, 0.0f); + src[2] = ureg_imm1f(t->ureg, 1.0f); + emit_insn(t, TGSI_OPCODE_CLAMP, dst, 1, src, 3); + + /* REG0=LRP(f.xxxy, REG0, fogcolor) */ + //TODO ff_fs uses a series of arithmetic instead of lerp + dst[0] = t->temps[0]; + src[0] = ureg_swizzle(ureg_src(get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI)), + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y); + src[1] = ureg_src(t->temps[0]); + src[2] = fogcolor; + emit_insn(t, TGSI_OPCODE_LRP, dst, 1, src, 3); +} + +static void finalize_shader(struct st_translate *t, unsigned numPasses) +{ + struct ureg_dst dst[1] = { { 0 } }; + struct ureg_src src[1] = { { 0 } }; + if (t->regs_written[numPasses-1][0]) { + if (t->ctx->Fog.Enabled) + apply_fog(t); + + /*add_tint(t, 1, 0, 1);*/ + + /* copy the result into the OUT slot */ + dst[0] = t->outputs[t->outputMapping[FRAG_RESULT_COLOR]]; + src[0] = ureg_src(t->temps[0]); + emit_insn(t, TGSI_OPCODE_MOV, dst, 1, src, 1); + } + + /* signal the end of the program */ + emit_insn(t, TGSI_OPCODE_END, dst, 0, src, 0); +} + +/** + * Called when a new variant is needed, we need to translate the ATI fragment shader + * to TGSI + */ +enum pipe_error +st_translate_atifs_program( + struct gl_context *ctx, + uint procType, + struct ureg_program *ureg, + struct ati_fragment_shader *atifs, + struct gl_program *program, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[], + boolean passthrough_edgeflags, + boolean clamp_color) +{ + enum pipe_error ret = PIPE_OK; + + unsigned pass, i, r; + + struct st_translate translate, *t; + t = &translate; + memset(t, 0, sizeof *t); + + t->inputMapping = inputMapping; + t->outputMapping = outputMapping; + t->ureg = ureg; + t->ctx = ctx; + t->atifs = atifs; + + _mesa_debug(0, "Compiling ATI fragment shader %u\n", atifs->Id); + + /* + * Declare input attributes. + */ + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_fs_input(ureg, + inputSemanticName[i], + inputSemanticIndex[i], + interpMode[i]); + } + + /* + * Declare output attributes: + * we always have numOutputs=1 and it's FRAG_RESULT_COLOR + */ + // TODO assert(numOutputs==1 && outputSemanticName[0]==TGSI_SEMANTIC_COLOR && outputSemanticIndex[0]==0); ? + t->outputs[0] = ureg_DECL_output( ureg, + TGSI_SEMANTIC_COLOR, + outputSemanticIndex[0] ); + + /* Emit constants and immediates. Mesa uses a single index space + * for these, so we put all the translated regs in t->constants. + */ + if (program->Parameters) { + t->constants = calloc( program->Parameters->NumParameters, + sizeof t->constants[0] ); + if (t->constants == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + + for (i = 0; i < program->Parameters->NumParameters; i++) { + switch (program->Parameters->Parameters[i].Type) { + case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: + t->constants[i] = ureg_DECL_constant( ureg, i ); + break; + + case PROGRAM_CONSTANT: + t->constants[i] = + ureg_DECL_immediate( ureg, + (const float*)program->Parameters->ParameterValues[i], + 4 ); + break; + default: + break; + } + } + } + + /* texture samplers */ + for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) { + if (program->SamplersUsed & (1 << i)) { + //TODO here, if texture is undef, we need to cancel the reference + // and later convert the sample instruction into MOV(reg, 0) (?) + t->samplers[i] = ureg_DECL_sampler( ureg, i ); + + /* fix texture targets that are not 2D */ + /* note: this implementation doesn't support re-using an ATI_fs + * with different texture targets + */ + gl_texture_index index = get_texture_target_index(ctx, i); + if (index != TEXTURE_2D_INDEX) { + _mesa_debug(0, "Fixing tex target %u to %u\n", i, index); + program->TexturesUsed[i] &= ~TEXTURE_2D_BIT; + program->TexturesUsed[i] |= (1 << index); + } else { + _mesa_debug(0, "Tex target %u stays TEXTURE_2D\n", i); + } + } + } + + /* emit instructions */ + for (pass=0; pass<atifs->NumPasses; pass++) { + t->current_pass = pass; + for (r=0; r<MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) { + struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r]; + compile_setupinst(ctx, t, r, texinst); + } + for (i=0; i<atifs->numArithInstr[pass]; i++) { + struct atifs_instruction *inst = &atifs->Instructions[pass][i]; + compile_instruction(t, inst); + } + } + + finalize_shader(t, atifs->NumPasses); + +out: + free(t->insn); + free(t->constants); + + if (t->error) { + debug_printf("%s: translate error flag set\n", __func__); + } + _mesa_debug(0, "ATI fragment shader is compiled\n"); + + return ret; +} + +/** + * Called in ProgramStringNotify, we need to fill the metadata of the + * gl_program attached to the ati_fragment_shader + */ +void +st_init_atifs_prog(struct gl_context *ctx, struct gl_program *prog) +{ + /* we know this is st_fragment_program, because of st_new_ati_fs() */ + struct st_fragment_program *stfp = (struct st_fragment_program *) prog; + struct ati_fragment_shader *atifs = stfp->ati_fs; + + unsigned pass, i, r, optype, arg; + + static const gl_state_index fog_params_state[STATE_LENGTH] = + {STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED, 0, 0, 0}; + static const gl_state_index fog_color[STATE_LENGTH] = + {STATE_FOG_COLOR, 0, 0, 0, 0}; + + _mesa_debug(0, "Initializing the gl_program of ATI fs\n"); + + prog->InputsRead = 0; + prog->OutputsWritten = BITFIELD64_BIT(FRAG_RESULT_COLOR); + prog->SamplersUsed = 0; + prog->Parameters = _mesa_new_parameter_list(); + + /* fill in InputsRead, SamplersUsed, TexturesUsed */ + for (pass=0; pass<atifs->NumPasses; pass++) { + for (r=0; r<MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) { + struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r]; + GLuint pass_tex = texinst->src; + + if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) { + /* mark which texcoords are used */ + prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB); + /* by default there is 1:1 mapping between samplers and textures */ + prog->SamplersUsed |= (1 << r); + /* the target is unknown here in glEndFragmentShaderATI(), + * it will be fixed this during compiling, which is in the draw call + */ + prog->TexturesUsed[r] |= TEXTURE_2D_BIT; + } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) { + if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) { + prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB); + } + } + } + } + for (pass=0; pass<atifs->NumPasses; pass++) { + for (i=0; i<atifs->numArithInstr[pass]; i++) { + struct atifs_instruction *inst = &atifs->Instructions[pass][i]; + for (optype=0; optype<2; optype++) { // color, alpha + if (inst->Opcode[optype]) { + for (arg=0; arg<inst->ArgCount[optype]; arg++) { + GLint index = inst->SrcReg[optype][arg].Index; + if (index == GL_PRIMARY_COLOR_EXT) { + prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_COL0); + } else if (index == GL_SECONDARY_INTERPOLATOR_ATI) { + /* note: ATI_fragment_shader.txt never specifies what + * GL_SECONDARY_INTERPOLATOR_ATI is, swrast uses + * VARYING_SLOT_COL1 for this input */ + prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_COL1); + } + } + } + } + } + } + /* We may need fog */ + prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_FOGC); + + /* we always have 8 ATI_fs constants, and the fog params */ + for (i=0; i<8; i++) { + //TODO check the return value of this + _mesa_add_parameter(prog->Parameters, PROGRAM_UNIFORM, + NULL, 4, GL_FLOAT, NULL, NULL); + } + //TODO check the return value of these, too + _mesa_add_state_reference(prog->Parameters, fog_params_state); + _mesa_add_state_reference(prog->Parameters, fog_color); + + prog->NumInstructions = 0; + prog->NumTemporaries = 6+3; // 6 registers, 3 input temps for arith ops + prog->NumParameters = 8+2; // the number of constants + state variables +} diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.h b/src/mesa/state_tracker/st_atifs_to_tgsi.h new file mode 100644 index 0000000..f7e04e9 --- /dev/null +++ b/src/mesa/state_tracker/st_atifs_to_tgsi.h @@ -0,0 +1,49 @@ +//TODO copyright header + + +#ifndef ST_ATIFS_TO_TGSI_H +#define ST_ATIFS_TO_TGSI_H + +#if defined __cplusplus +extern "C" { +#endif + +#include "main/glheader.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" + +struct gl_context; +struct gl_program; +struct tgsi_token; +struct ureg_program; + +enum pipe_error +st_translate_atifs_program( + struct gl_context *ctx, + uint procType, + struct ureg_program *ureg, + struct ati_fragment_shader *atifs, + struct gl_program *program, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[], + boolean passthrough_edgeflags, + boolean clamp_color); + + +void +st_init_atifs_prog(struct gl_context *ctx, struct gl_program *prog); + + +#if defined __cplusplus +} /* extern "C" */ +#endif + +#endif /* ST_ATIFS_TO_TGSI_H */ diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index 6affb4d..3bd090f 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -63,6 +63,20 @@ void st_upload_constants( struct st_context *st, shader_type == PIPE_SHADER_TESS_CTRL || shader_type == PIPE_SHADER_TESS_EVAL); + /* update the ATI constants before rendering */ + struct ati_fragment_shader *ati_fs = st->fp->ati_fs; + if (ati_fs) { + unsigned c; + for (c=0; c<8; c++) { + if (ati_fs->LocalConstDef & (1 << c)) + memcpy(params->ParameterValues[c], + ati_fs->Constants[c], sizeof(GLfloat) * 4); + else + memcpy(params->ParameterValues[c], + st->ctx->ATIFragmentShader.GlobalConstants[c], sizeof(GLfloat) * 4); + } + } + /* update constants */ if (params && params->NumParameters) { struct pipe_constant_buffer cb; diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index b372697..bf764e5 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -1343,6 +1343,7 @@ blit_copy_pixels(struct gl_context *ctx, GLint srcx, GLint srcy, !ctx->FragmentProgram.Enabled && !ctx->VertexProgram.Enabled && !ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT] && + !ctx->ATIFragmentShader._Enabled && //TODO is this needed? ctx->DrawBuffer->_NumColorDrawBuffers == 1 && !ctx->Query.CondRenderQuery && !ctx->Query.CurrentOcclusionObject) { diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 3029909..dfe2542 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -35,6 +35,7 @@ #include "main/enums.h" #include "main/shaderapi.h" #include "program/prog_instruction.h" +#include "program/prog_parameter.h" #include "program/program.h" #include "cso_cache/cso_context.h" @@ -46,6 +47,7 @@ #include "st_mesa_to_tgsi.h" #include "st_cb_program.h" #include "st_glsl_to_tgsi.h" +#include "st_atifs_to_tgsi.h" @@ -272,6 +274,21 @@ st_program_string_notify( struct gl_context *ctx, if (st->tep == sttep) st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM; } + else if (target == GL_FRAGMENT_SHADER_ATI) { + assert(prog); + + struct st_fragment_program *stfp = (struct st_fragment_program *) prog; + assert(stfp->ati_fs); + assert(stfp->ati_fs->Program == prog); + + st_release_fp_variants(st, stfp); + + if (st->fp == stfp) + st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; + + /* now that we have the whole shader, we can set up the metadata in @prog */ + st_init_atifs_prog(ctx, prog); + } if (ST_DEBUG & DEBUG_PRECOMPILE) st_precompile_shader_variant(st, prog); @@ -280,6 +297,21 @@ st_program_string_notify( struct gl_context *ctx, return GL_TRUE; } +/** + * Called via ctx->Driver.NewATIfs() + * Called when a new ATI fragment shader is created with gl_bindFragmentShaderATI() + */ +static struct gl_program * +st_new_ati_fs( struct gl_context *ctx, + GLuint id) +{ + //TODO is this id safe to use? + struct gl_program *prog = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, id); + struct st_fragment_program *stfp = (struct st_fragment_program *)prog; + stfp->ati_fs = ctx->ATIFragmentShader.Current; + return prog; +} + /** * Plug in the program and shader-related device driver functions. @@ -293,6 +325,7 @@ st_init_program_functions(struct dd_function_table *functions) functions->DeleteProgram = st_delete_program; functions->IsProgramNative = st_is_program_native; functions->ProgramStringNotify = st_program_string_notify; - + functions->NewATIfs = st_new_ati_fs; + functions->LinkShader = st_link_shader; } diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index e62dd7a..2e79932 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -51,6 +51,7 @@ #include "st_context.h" #include "st_program.h" #include "st_mesa_to_tgsi.h" +#include "st_atifs_to_tgsi.h" #include "cso_cache/cso_context.h" @@ -571,6 +572,9 @@ st_translate_fragment_program(struct st_context *st, assert(!(key->bitmap && key->drawpixels)); memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr)); + //TODO what about ATI_fs and glBitmap(), glDrawPixels()? + // the spec says nothing about those + if (key->bitmap) { /* glBitmap drawing */ struct gl_fragment_program *fp; /* we free this temp program below */ @@ -860,6 +864,24 @@ st_translate_fragment_program(struct st_context *st, fs_output_semantic_name, fs_output_semantic_index, FALSE, key->clamp_color ); + else if (stfp->ati_fs) + st_translate_atifs_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, + ureg, + stfp->ati_fs, + &stfp->Base.Base, + /* inputs */ + fs_num_inputs, + inputMapping, + input_semantic_name, + input_semantic_index, + interpMode, + /* outputs */ + fs_num_outputs, + outputMapping, + fs_output_semantic_name, + fs_output_semantic_index, FALSE, + key->clamp_color ); else st_translate_mesa_program(st->ctx, TGSI_PROCESSOR_FRAGMENT, diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 7013993..4e1f4de 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -99,6 +99,7 @@ struct st_fragment_program { struct gl_fragment_program Base; struct glsl_to_tgsi_visitor* glsl_to_tgsi; + struct ati_fragment_shader *ati_fs; struct st_fp_variant *variants; }; -- 2.6.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev