Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/brw_fs.h | 8 +++ src/mesa/drivers/dri/i965/brw_fs_emitter.cpp | 86 ++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index a30351d..eaa5332 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -295,6 +295,14 @@ protected: const fs_reg& x, const fs_reg& y); + void emit_linear_to_w_tiling(const fs_reg& t1, + const fs_reg& t2, + const fs_reg& stride, + const fs_reg& src_x, + const fs_reg& src_y, + const fs_reg& dst_x, + const fs_reg& dst_y); + void push_force_uncompressed(); void pop_force_uncompressed(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp b/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp index 22fa33d..0d5cfb4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp @@ -208,6 +208,92 @@ fs_emitter::emit_coord_swizzling(const fs_reg& t1, emit(BRW_OPCODE_ENDIF); } +/** + * Emit translation of coordinates src_x and src_y in linear memory space into + * corresponding coordinates dst_x and dst_y in W-tiled layout. The algorithm + * divides the linear space into W-tiles (64x64), sub-tiles (8x8), + * sub-sub-tiles (4x4) and finally into sub-sub-sub-tiles (2x2). Note that 8x8 + * blocks are laid out in memory in column major order. + * The operation requires two temporary registers in addition to the source + * and destination. Note also that source and destination registers cannot + * overlap. + * + * offset = y * stride + x + * tile_y = y / 64 + * offset_x = (y % 64) * stride + x + * tile_x = offset_x / (64 * 64) + * tile_offset = offset % (64 * 64) <==> (offset & 0xfff) + * s_tile_n = tile_offset / 64 <==> (offset & 0xfff) >> 6 + * s_tile_x = sub_tile_n / 8 <==> (offset & 0xfff) >> 9 + * s_tile_y = sub_tile_n % 8 <==> (offset & 0xff8) >> 6 + * s_tile_offset = tile_offset % 64 <==> (x & 0x3f) + * s_s_tile_n = s_tile_offset / 16 <==> (x & 0x3f) >> 4 + * s_s_tile_y = s_s_tile_n / 2 <==> (x & 0x3f) >> 5 + * s_s_tile_x = s_s_tile_n % 2 <==> (x & 0x10) >> 4 + * s_s_tile_offset = s_tile_offset % 16 <==> (x & 0x3c) & 0xf + * s_s_s_tile_n = s_s_tile_offset / 4 <==> (x & 0x0f) >> 2 + * s_s_s_tile_y = s_s_s_tile_n / 2 <==> (x & 0x0f) >> 3 + * s_s_s_tile_x = s_s_s_tile_n % 2 <==> (x & 0x0c) >> 2 + * s_s_s_tile_offset = s_s_tile_offset % 4 <==> (x & 0x3) + * + * dst_y = tile_y * 64 + dst_y = (y & 0xffc0) + + * s_tile_y * 8 + ((offset & 0x1c0) >> 3) + + * s_s_tile_y * 4 + <==> ((x & 0x20) >> 3) + + * s_s_s_tile_y * 2 + ((x & 0x08) >> 2) + + * s_s_s_tile_offset / 2 ((x & 0x03) >> 1) + * + * dst_x = tile_x * 64 + dst_x = ((((y & 0x3f) * stride + x) & + * 0xf000) / 64) + + * s_tile_x * 8 + ((offset & 0xe00) >> 6) + + * s_s_tile_x * 4 + <==> ((x & 0x10) >> 2) + + * s_s_s_tile_x * 2 + ((x & 0x04) >> 1) + + * s_s_s_tile_offset % 2 (x & 0x1) + */ +void +fs_emitter::emit_linear_to_w_tiling(const fs_reg& t1, + const fs_reg& t2, + const fs_reg& stride, + const fs_reg& src_x, + const fs_reg& src_y, + const fs_reg& dst_x, + const fs_reg& dst_y) +{ + emit(AND(t1, src_y, brw_imm_uw(0x3f))); /* src_y & 0x3f */ + emit(MUL(t1, t1, stride)); /* (src_y & 0x3f) * stride */ + emit(ADD(t1, t1, src_x)); /* (src_y & 0x3f) * stride + src_x */ + emit(AND(t1, t1, brw_imm_uw(0xf000))); + emit(SHR(dst_x, t1, brw_imm_uw(6))); /* tile_x * 64 */ + emit(MUL(t1, src_y, stride)); /* src_y * stride */ + emit(ADD(t1, t1, src_x)); /* offset */ + emit(AND(t2, t1, brw_imm_uw(0xe00))); /* offset & 0xe00 */ + emit(SHR(t2, t2, brw_imm_uw(6))); /* (offset & 0xe00) >> 6 */ + emit(ADD(dst_x, dst_x, t2)); /* dst_x += ((offset & 0xe00) >> 6) */ + emit(AND(t2, t1, brw_imm_uw(0x1c0))); /* offset & 0x1c0 */ + emit(SHR(dst_y, t2, brw_imm_uw(3))); /* dst_y = (offset & 0x1c0) >> 3 */ + emit(AND(t1, src_x, brw_imm_uw(0x10))); /* src_x & 0x10 */ + emit(SHR(t1, t1, brw_imm_uw(2))); /* (src_x & 0x10) >> 2 */ + emit(ADD(dst_x, dst_x, t1)); /* dst_x += ((src_x & 0x10) >> 2) */ + emit(AND(t1, src_x, brw_imm_uw(0x4))); /* src_x & 0x4 */ + emit(SHR(t1, t1, brw_imm_uw(1))); /* (src_x & 0x4) >> 1 */ + emit(ADD(dst_x, dst_x, t1)); /* dst_x += ((src_x & 0x4) >> 1) */ + emit(AND(t1, src_x, brw_imm_uw(0x1))); /* src_x & 0x1 */ + emit(ADD(dst_x, dst_x, t1)); /* dst_x += (src_x & 0x1) */ + emit(AND(t1, src_y, brw_imm_uw(0xffc0))); /* src_y & 0xffc0 */ + emit(ADD(dst_y, dst_y, t1)); /* dst_y += (src_y & 0xffc0) */ + emit(AND(t1, src_x, brw_imm_uw(0x20))); /* src_x & 0x20 */ + emit(SHR(t1, t1, brw_imm_uw(3))); /* (src_x & 0x20) >> 3 */ + emit(ADD(dst_y, dst_y, t1)); /* dst_y += ((src_x & 0x20) >> 3) */ + emit(AND(t1, src_x, brw_imm_uw(0x8))); /* src_x & 0x8 */ + emit(SHR(t1, t1, brw_imm_uw(2))); /* (src_x & 0x8) >> 2 */ + emit(ADD(dst_y, dst_y, t1)); /* dst_y += ((src_x & 0x8) >> 2) */ + emit(AND(t1, src_x, brw_imm_uw(0x3))); /* src_x & 0x3 */ + emit(SHR(t1, t1, brw_imm_uw(1))); /* (src_x & 0x3) >> 1 */ + emit(ADD(dst_y, dst_y, t1)); /* dst_y += ((src_x & 0x3) >> 1) */ + + if (brw->has_swizzling) + emit_coord_swizzling(t1, dst_x, dst_y); +} + fs_emitter::fs_emitter(struct brw_context *brw, struct brw_wm_compile *c, unsigned dispatch_width) -- 1.8.3.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev