Module: Mesa Branch: master Commit: 5289276bd4faedb67b3c42e6572fb3e0374aa6b6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5289276bd4faedb67b3c42e6572fb3e0374aa6b6
Author: Tiziano Bacocco <[email protected]> Date: Fri Jan 30 20:10:38 2015 +0100 st/nine: Implement TEXBEM,TEXBEML and BEM Signed-off-by: Tiziano Bacocco <[email protected]> --- src/gallium/state_trackers/nine/device9.c | 4 +- src/gallium/state_trackers/nine/nine_shader.c | 122 ++++++++++++++++++++++-- src/gallium/state_trackers/nine/nine_shader.h | 1 + src/gallium/state_trackers/nine/nine_state.c | 27 ++++++ src/gallium/state_trackers/nine/nine_state.h | 1 + src/gallium/state_trackers/nine/pixelshader9.c | 1 + src/gallium/state_trackers/nine/pixelshader9.h | 1 + 7 files changed, 149 insertions(+), 8 deletions(-) diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 96061e0..feb0b8a 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -320,8 +320,9 @@ NineDevice9_ctor( struct NineDevice9 *This, This->state.vs_const_f = CALLOC(This->vs_const_size, 1); This->state.ps_const_f = CALLOC(This->ps_const_size, 1); This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1); + This->state.ps_bumpenvmap_temp = CALLOC(This->ps_const_size,1); if (!This->state.vs_const_f || !This->state.ps_const_f || - !This->state.vs_lconstf_temp) + !This->state.vs_lconstf_temp || !This->state.ps_bumpenvmap_temp) return E_OUTOFMEMORY; if (strstr(pScreen->get_name(pScreen), "AMD") || @@ -442,6 +443,7 @@ NineDevice9_dtor( struct NineDevice9 *This ) FREE(This->state.vs_const_f); FREE(This->state.ps_const_f); FREE(This->state.vs_lconstf_temp); + FREE(This->state.ps_bumpenvmap_temp); if (This->swapchains) { for (i = 0; i < This->nswapchains; ++i) diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 8ce3530..3f670ab 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -2145,12 +2145,78 @@ DECL_SPECIAL(TEXKILL) DECL_SPECIAL(TEXBEM) { - STUB(D3DERR_INVALIDCALL); -} + struct ureg_program *ureg = tx->ureg; + struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); + struct ureg_dst tmp; + struct ureg_dst tmp2; + struct ureg_src sample; + struct ureg_src m00; + struct ureg_src m01; + struct ureg_src m10; + struct ureg_src m11; + struct ureg_src bumpenvlscale; + struct ureg_src bumpenvloffset; + const int m = tx->insn.dst[0].idx; + const int n = tx->insn.src[0].idx; -DECL_SPECIAL(TEXBEML) -{ - STUB(D3DERR_INVALIDCALL); + assert(tx->version.major == 1); + + sample = ureg_DECL_sampler(ureg, m); + tx->info->sampler_mask |= 1 << m; + + tx_texcoord_alloc(tx, m); + + tmp = tx_scratch(tx); + tmp2 = tx_scratch(tx); + /* + * Bump-env-matrix: + * 00 is X + * 01 is Y + * 10 is Z + * 11 is W + */ + m00 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + m), NINE_SWIZZLE4(X, X, X, X)); + m01 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + m), NINE_SWIZZLE4(Y, Y, Y, Y)); + m10 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + m), NINE_SWIZZLE4(Z, Z, Z, Z)); + m11 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + m), NINE_SWIZZLE4(W, W, W, W)); + + /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */ + if (m % 2 == 0) { + bumpenvlscale = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + 8 + m / 2), NINE_SWIZZLE4(X, X, X, X)); + bumpenvloffset = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + 8 + m / 2), NINE_SWIZZLE4(Y, Y, Y, Y)); + } else { + bumpenvlscale = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + 8 + m / 2), NINE_SWIZZLE4(Z, Z, Z, Z)); + bumpenvloffset = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + 8 + m / 2), NINE_SWIZZLE4(W, W, W, W)); + } + + /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(X, X, X, X)), tx->regs.vT[m]); + /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), + m10, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y, Y, Y, Y)), ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(X, X, X, X))); + + /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(X, X, X, X)), tx->regs.vT[m]); + /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), + m11, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y, Y, Y, Y)), ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Y, Y, Y, Y))); + + /* Now the texture coordinates are in tmp.xy */ + + if (tx->insn.opcode == D3DSIO_TEXBEM) { + ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); + } else if (tx->insn.opcode == D3DSIO_TEXBEML) { + /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */ + ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); + ureg_MAD(ureg, tmp2, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Z, Z, Z, Z)), bumpenvlscale, bumpenvloffset); + ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2)); + } + + + + tx->info->bumpenvmat_needed = 1; + + return D3D_OK; } DECL_SPECIAL(TEXREG2AR) @@ -2429,7 +2495,42 @@ DECL_SPECIAL(TEXDEPTH) DECL_SPECIAL(BEM) { - STUB(D3DERR_INVALIDCALL); + struct ureg_program *ureg = tx->ureg; + struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); + struct ureg_dst tmp; + struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); + struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); + struct ureg_src m00; + struct ureg_src m01; + struct ureg_src m10; + struct ureg_src m11; + /* + * Bump-env-matrix: + * 00 is X + * 01 is Y + * 10 is Z + * 11 is W + */ + m00 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + tx->insn.dst[0].idx), NINE_SWIZZLE4(X, X, X, X)); + m01 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + tx->insn.dst[0].idx), NINE_SWIZZLE4(Y, Y, Y, Y)); + m10 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + tx->insn.dst[0].idx), NINE_SWIZZLE4(Z, Z, Z, Z)); + m11 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + tx->insn.dst[0].idx), NINE_SWIZZLE4(W, W, W, W)); + /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, ureg_swizzle(src1, NINE_SWIZZLE4(X, X, X, X)), ureg_swizzle(src0, NINE_SWIZZLE4(X, X, X, X))); + /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), + m10, ureg_swizzle(src1, NINE_SWIZZLE4(Y, Y, Y, Y)), ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(X, X, X, X))); + + /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, ureg_swizzle(src1, NINE_SWIZZLE4(X, X, X, X)), src0); + /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), + m11, ureg_swizzle(src1, NINE_SWIZZLE4(Y, Y, Y, Y)), ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Y, Y, Y, Y))); + ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp)); + + tx->info->bumpenvmat_needed = 1; + + return D3D_OK; } DECL_SPECIAL(TEXLD) @@ -2624,7 +2725,7 @@ struct sm1_op_info inst_table[] = _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)), _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)), _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), - _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)), + _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)), _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)), _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)), @@ -3031,6 +3132,8 @@ tx_ctor(struct shader_translator *tx, struct nine_shader_info *info) info->lconstf.data = NULL; info->lconstf.ranges = NULL; + info->bumpenvmat_needed = 0; + for (i = 0; i < Elements(tx->regs.rL); ++i) { tx->regs.rL[i] = ureg_dst_undef(); } @@ -3239,6 +3342,11 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) info->const_int_slots > 0 ? device->max_vs_const_f + info->const_int_slots : info->const_float_slots; + assert(IS_VS || tx->version.major > 1 || slot_max <= 8); + + if (info->bumpenvmat_needed) + slot_max = 8 + 8 + 4; /* 8 for ps1_x + 8 for texbem + 4 for texbeml*/ + info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */ for (s = 0; s < slot_max; s++) diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h index 56c5d99..54049d2 100644 --- a/src/gallium/state_trackers/nine/nine_shader.h +++ b/src/gallium/state_trackers/nine/nine_shader.h @@ -68,6 +68,7 @@ struct nine_shader_info unsigned const_bool_slots; struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */ + uint8_t bumpenvmat_needed; }; static INLINE void diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 495cc86..207f5e7 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -566,6 +566,7 @@ update_ps_constants_userbuf(struct NineDevice9 *device) struct nine_state *state = &device->state; struct pipe_context *pipe = device->pipe; struct pipe_constant_buffer cb; + int i; cb.buffer = NULL; cb.buffer_offset = 0; cb.buffer_size = device->state.ps->const_used_size; @@ -586,6 +587,32 @@ update_ps_constants_userbuf(struct NineDevice9 *device) state->changed.ps_const_b = 0; } + /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */ + if (device->state.ps->byte_code.version >> 4 == 1 && device->state.ps->bumpenvmat_needed) { /* Version.major = 1 */ + + memcpy(device->state.ps_bumpenvmap_temp, cb.user_buffer, cb.buffer_size); + + /* Set the bump env matrix */ + for (i = 0; i < 8; i++) { + /* 4floats*maxps1xconst+4floats*texstage+matpart */ + /* The matrix as comments on wine visual.c test say, is transposed */ + device->state.ps_bumpenvmap_temp[4 * 8 + 4 * i + 0] = *((float *)&device->state.ff.tex_stage[i][D3DTSS_BUMPENVMAT00]); + device->state.ps_bumpenvmap_temp[4 * 8 + 4 * i + 1] = *((float *)&device->state.ff.tex_stage[i][D3DTSS_BUMPENVMAT10]); + device->state.ps_bumpenvmap_temp[4 * 8 + 4 * i + 2] = *((float *)&device->state.ff.tex_stage[i][D3DTSS_BUMPENVMAT01]); + device->state.ps_bumpenvmap_temp[4 * 8 + 4 * i + 3] = *((float *)&device->state.ff.tex_stage[i][D3DTSS_BUMPENVMAT11]); + } + + /* Set the bumpenvl parameters */ + for (i = 0; i < 4; i++) { + device->state.ps_bumpenvmap_temp[4 * 8 + 4 * 8 + i * 4 + 0] = *((float *)&device->state.ff.tex_stage[i * 2 + 0][D3DTSS_BUMPENVLSCALE]); + device->state.ps_bumpenvmap_temp[4 * 8 + 4 * 8 + i * 4 + 1] = *((float *)&device->state.ff.tex_stage[i * 2 + 0][D3DTSS_BUMPENVLOFFSET]); + device->state.ps_bumpenvmap_temp[4 * 8 + 4 * 8 + i * 4 + 2] = *((float *)&device->state.ff.tex_stage[i * 2 + 1][D3DTSS_BUMPENVLSCALE]); + device->state.ps_bumpenvmap_temp[4 * 8 + 4 * 8 + i * 4 + 3] = *((float *)&device->state.ff.tex_stage[i * 2 + 1][D3DTSS_BUMPENVLOFFSET]); + } + + cb.user_buffer = device->state.ps_bumpenvmap_temp; + } + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb); if (device->state.changed.ps_const_f) { diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h index 1916959..e0041f8 100644 --- a/src/gallium/state_trackers/nine/nine_state.h +++ b/src/gallium/state_trackers/nine/nine_state.h @@ -143,6 +143,7 @@ struct nine_state int vs_const_i[NINE_MAX_CONST_I][4]; BOOL vs_const_b[NINE_MAX_CONST_B]; float *vs_lconstf_temp; + float *ps_bumpenvmap_temp; uint32_t vs_key; struct NinePixelShader9 *ps; diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c index 3f176a3..010c4f4 100644 --- a/src/gallium/state_trackers/nine/pixelshader9.c +++ b/src/gallium/state_trackers/nine/pixelshader9.c @@ -72,6 +72,7 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This, This->sampler_mask = info.sampler_mask; This->rt_mask = info.rt_mask; This->const_used_size = info.const_used_size; + This->bumpenvmat_needed = info.bumpenvmat_needed; /* no constant relative addressing for ps */ assert(info.lconstf.data == NULL); assert(info.lconstf.ranges == NULL); diff --git a/src/gallium/state_trackers/nine/pixelshader9.h b/src/gallium/state_trackers/nine/pixelshader9.h index 5e2219c..32be0cd 100644 --- a/src/gallium/state_trackers/nine/pixelshader9.h +++ b/src/gallium/state_trackers/nine/pixelshader9.h @@ -41,6 +41,7 @@ struct NinePixelShader9 unsigned const_used_size; /* in bytes */ + uint8_t bumpenvmat_needed; uint16_t sampler_mask; uint16_t sampler_mask_shadow; uint8_t rt_mask; _______________________________________________ mesa-commit mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-commit
