Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_fs_fp.cpp | 3 +- src/mesa/drivers/dri/i965/brw_fs_stencil_tex.cpp | 92 +++++++++++++++++++++++- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 23 ++++-- 4 files changed, 110 insertions(+), 10 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index e1f540d..eae242b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -185,7 +185,7 @@ public: fs_reg shadow_comp, fs_reg lod, fs_reg lod2); fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, fs_reg shadow_comp, fs_reg lod, fs_reg lod2, - fs_reg sample_index); + fs_reg sample_index, int sampler); fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, fs_reg shadow_comp, fs_reg lod, fs_reg lod2, fs_reg sample_index, fs_reg mcs, int sampler); diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp index ba5514a..f34b2ad 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp @@ -496,7 +496,8 @@ fs_visitor::emit_fragment_program_code() if (brw->gen >= 7) { inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index, fs_reg(0u), fpi->TexSrcUnit); } else if (brw->gen >= 5) { - inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index); + inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy, + sample_index, fpi->TexSrcUnit); } else { inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_stencil_tex.cpp b/src/mesa/drivers/dri/i965/brw_fs_stencil_tex.cpp index 2d813e9..cd746db 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_stencil_tex.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_stencil_tex.cpp @@ -65,6 +65,78 @@ namespace { /** + * Emit instructions transforming pixel coordinates into interleaved (IMS) + * sample coordinates. This is needed when a multisampled surface is textured + * without the hardware support. Unfortunately on generations from five to + * seven the sampling engine cannot does not support W-tiled stencil and part + * of the needed workaround is to access the individual samples manually. + * + * Samples are organised as follows: + * + * --------------------------------- ----------------- + * | 0 | 0 | 1 | 1 | 4 | 4 | 5 | 5 | | 0 | 0 | 1 | 1 | + * | 0 | 0 | 1 | 1 | 4 | 4 | 5 | 5 | | 0 | 0 | 1 | 1 | + * | 2 | 2 | 3 | 3 | 6 | 6 | 7 | 7 | | 2 | 2 | 3 | 3 | + * | 2 | 2 | 3 | 3 | 6 | 6 | 7 | 7 | | 2 | 2 | 3 | 3 | + * --------------------------------- ----------------- + * 8X 4X + * + * This allows for the following conversions where S represents the sample + * index: + * + * 4: X' = (X & ~0b1) << 1 | (S & 0b1) << 1 | (X & 0b1) + * Y' = (Y & ~0b1) << 1 | (S & 0b10) | (Y & 0b1) + * + * 8: X' = (X & ~0b1) << 2 | (S & 0b100) | (S & 0b1) << 1 | (X & 0b1) + * Y' = (Y & ~0b1) << 1 | (S & 0b10) | (Y & 0b1) + * + * The surface is setup for the sampling engine to return one 2x2 block + * for 4X-case and two 2x2 blocks for 8X-case. The former contains the samples + * for one index while the latter for an index pair - possible pairs are + * (0,1), (2,3), (4,5) and (6,7). + * + * The lowest bits in the coordinates are ignored (set to zero) in order to + * get the top left corner of the block(s). Similarly the lowest bit of the + * smaple index is ignored in the 8X case. + * + * The motivation for the arrangement is to not impose any additional + * limitations for the texture sizes. Without one is forced to supply upscaled + * width and height to the sampling engine thus restricting the size of the + * original texture. + */ +static void +emit_encode_ims(fs_emitter *e, const fs_reg& coord, const fs_reg &s, + unsigned num_samples) +{ + fs_reg tmp(e, glsl_type::uint_type); + fs_reg x(coord); + fs_reg y(offset(coord, 1)); + + assert(num_samples == 4 || num_samples == 8); + + e->emit(e->AND(tmp, x, fs_reg(0x1))); /* (X & 0b1) */ + e->emit(e->AND(x, x, fs_reg(0xfffe))); /* (X & ~0b1) */ + e->emit(e->SHL(x, x, fs_reg(num_samples / 4))); /* (X & ~0b1) << 1:2 */ + e->emit(e->OR(x, x, tmp)); /* (X & ~0b1) << 1:2 | (X & 0b1) */ + + if (num_samples == 8) { + e->emit(e->AND(tmp, s, fs_reg(4))); /* S & 0b100 */ + e->emit(e->OR(x, x, tmp)); /* X' = X' | (S & 0b100) */ + } else { + e->emit(e->AND(tmp, s, fs_reg(0x1))); /* (S & 0b1) */ + e->emit(e->SHL(tmp, tmp, fs_reg(0x1))); /* (S & 0b1) << 1 */ + e->emit(e->OR(x, x, tmp)); /* X' = X' | (S & 0b1) << 1 */ + } + + e->emit(e->AND(tmp, y, fs_reg(0x1))); /* (Y & 0b1) */ + e->emit(e->AND(y, y, fs_reg(0xfffe))); /* (Y & ~0b1) */ + e->emit(e->SHL(y, y, fs_reg(1))); /* (Y & ~0b1) << 1 */ + e->emit(e->OR(y, y, tmp)); /* (Y & ~0b1) << 1 | (Y & 0b1) */ + e->emit(e->AND(tmp, s, fs_reg(0x2))); /* (S & 0b10) */ + e->emit(e->OR(y, y, tmp)); /* Y' = Y' | (S & 0b10) */ +} + +/** * Emit translation of pixel coordinates src_x and src_y in W-tiled layout * to corresponding coordinates dst_x and dst_y in Y-tiled layout. * Note that source and destination registers cannot overlap. @@ -377,7 +449,7 @@ fs_stencil_texturing::emit_w_to_y_tiling(struct brw_fragment_program *fp, e->emit(e->AND(y_lowest_bit, offset(*coord, 1), fs_reg(0x1))); if (op == ir_txf_ms) { - assert(!"Multisampled stencil texturing is not supported"); + emit_encode_ims(e, *coord, sample_index, num_samples); } else { offset_to_w_tiled_miplevel(lod_ud); @@ -387,8 +459,11 @@ fs_stencil_texturing::emit_w_to_y_tiling(struct brw_fragment_program *fp, emit_translate_w_to_y_tiling(e, *coord); - /* Modify the pixel coordinates to point to the 2x2 block. */ - e->emit(e->SHR(*coord, *coord, 2)); + /* Modify the pixel coordinates to point to the 2x2 block. In case of + * 8X there are two 2x2 blocks side by side containg samples for one + * 2x2 pixel block. + */ + e->emit(e->SHR(*coord, *coord, fs_reg(num_samples == 8 ? 3 : 2))); } void @@ -397,6 +472,17 @@ fs_stencil_texturing::emit_pick_w_tiled_sample(const fs_reg& samples, { fs_inst *inst; + if (num_samples == 8) { + /* Choose between the pair of indices (0,1), (2,3), (4,5) or (6,7). In + * case the latter is needed simply move the green to the red channel. + */ + e->emit(e->AND(sample_index, sample_index, fs_reg(1))); + inst = e->emit(BRW_OPCODE_CMP, reg_null_f, sample_index, fs_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst = e->emit(e->MOV(samples, offset(samples, 1))); + inst->predicate = BRW_PREDICATE_NORMAL; + } + inst = e->emit(BRW_OPCODE_CMP, reg_null_f, y_lowest_bit, fs_reg(0)); inst->conditional_mod = BRW_CONDITIONAL_NZ; inst = e->emit(e->SHR(samples, samples, fs_reg(16))); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index f6bb010..30a3b84 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1125,6 +1125,19 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, return inst; } +static enum ir_texture_opcode +resolve_texture_op(const struct brw_sampler_prog_key_data *key, int sampler, + enum ir_texture_opcode op) +{ + /* In case of stencil, surfaces are configured as single sampled and + * coordinates are translated for texel fetches. + */ + if (key->num_w_tiled_samples[sampler]) + return ir_txf; + + return op; +} + /* gen5's sampler has slots for u, v, r, array index, then optional * parameters like shadow comparitor or LOD bias. If optional * parameters aren't present, those base slots are optional and don't @@ -1136,7 +1149,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, fs_inst * fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, fs_reg shadow_c, fs_reg lod, fs_reg lod2, - fs_reg sample_index) + fs_reg sample_index, int sampler) { int mlen = 0; int base_mrf = 2; @@ -1169,7 +1182,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, } fs_inst *inst = NULL; - switch (ir->op) { + switch (resolve_texture_op(&key->tex, sampler, ir->op)) { case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break; @@ -1295,7 +1308,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, bool coordinate_done = false; /* Set up the LOD info */ - switch (ir->op) { + switch (resolve_texture_op(&key->tex, sampler, ir->op)) { case ir_tex: case ir_lod: break; @@ -1422,7 +1435,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* Generate the SEND */ fs_inst *inst = NULL; - switch (ir->op) { + switch (resolve_texture_op(&key->tex, sampler, ir->op)) { case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst, payload); break; case ir_txb: inst = emit(FS_OPCODE_TXB, dst, payload); break; case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst, payload); break; @@ -1712,7 +1725,7 @@ fs_visitor::visit(ir_texture *ir) lod, lod2, sample_index, mcs, sampler); } else if (brw->gen >= 5) { inst = emit_texture_gen5(ir, dst, coordinate, shadow_comparitor, - lod, lod2, sample_index); + lod, lod2, sample_index, sampler); } else { inst = emit_texture_gen4(ir, dst, coordinate, shadow_comparitor, lod, lod2); -- 1.8.3.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev