-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 On 07/18/2011 12:55 AM, Chad Versace wrote: > Until now, the stencil buffer was allocated as a Y tiled buffer, because > in several locations the PRM states that it is. However, it is actually > W tiled. From the PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section > 4.5.2.1 W-Major Format: > W-Major Tile Format is used for separate stencil. > > The GTT is incapable of W fencing, so we allocate the stencil buffer with > I915_TILING_NONE and decode the tile's layout in software. > > This fix touches the following portions of code: > - In intel_allocate_renderbuffer_storage(), allocate the stencil > buffer with I915_TILING_NONE. > - In intel_verify_dri2_has_hiz(), verify that the stencil buffer is > not tiled. > - In the stencil buffer's span functions, the tile's layout must be > decoded in software. > > This commit mutually depends on the xf86-video-intel commit > dri: Do not tile stencil buffer > Author: Chad Versace <c...@chad-versace.us> > Date: Mon Jul 18 00:38:00 2011 -0700 > > On Gen6 with separate stencil enabled, fixes the following Piglit tests: > bugs/fdo23670-drawpix_stencil > general/stencil-drawpixels > spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-copypixels > spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-drawpixels > spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-readpixels > spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-copypixels > spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-drawpixels > spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-readpixels > spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-copypixels > spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-drawpixels > spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-readpixels > spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-copypixels > spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-drawpixels > spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-readpixels > spec/EXT_packed_depth_stencil/fbo-stencil-GL_DEPTH24_STENCIL8-copypixels > spec/EXT_packed_depth_stencil/fbo-stencil-GL_DEPTH24_STENCIL8-readpixels > spec/EXT_packed_depth_stencil/readpixels-24_8 > > Note: This is a candidate for the 7.11 branch. > > CC: Eric Anholt <e...@anholt.net> > CC: Kenneth Graunke <kenn...@whitecape.org> > Signed-off-by: Chad Versace <c...@chad-versace.us> > --- > src/mesa/drivers/dri/intel/intel_clear.c | 6 ++ > src/mesa/drivers/dri/intel/intel_context.c | 9 ++- > src/mesa/drivers/dri/intel/intel_fbo.c | 13 +++-- > src/mesa/drivers/dri/intel/intel_screen.h | 9 ++- > src/mesa/drivers/dri/intel/intel_span.c | 85 > ++++++++++++++++++++-------- > 5 files changed, 89 insertions(+), 33 deletions(-) > > diff --git a/src/mesa/drivers/dri/intel/intel_clear.c > b/src/mesa/drivers/dri/intel/intel_clear.c > index dfca03c..5ab9873 100644 > --- a/src/mesa/drivers/dri/intel/intel_clear.c > +++ b/src/mesa/drivers/dri/intel/intel_clear.c > @@ -143,6 +143,12 @@ intelClear(struct gl_context *ctx, GLbitfield mask) > */ > tri_mask |= BUFFER_BIT_STENCIL; > } > + else if (intel->has_separate_stencil && > + stencilRegion->tiling == I915_TILING_NONE) { > + /* The stencil buffer is actually W tiled, which the hardware > + * cannot blit to. */ > + tri_mask |= BUFFER_BIT_STENCIL; > + } > else { > /* clearing all stencil bits, use blitting */ > blit_mask |= BUFFER_BIT_STENCIL; > diff --git a/src/mesa/drivers/dri/intel/intel_context.c > b/src/mesa/drivers/dri/intel/intel_context.c > index 2ba1363..fe8be08 100644 > --- a/src/mesa/drivers/dri/intel/intel_context.c > +++ b/src/mesa/drivers/dri/intel/intel_context.c > @@ -1439,7 +1439,12 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, > assert(stencil_rb->Base.Format == MESA_FORMAT_S8); > assert(depth_rb && depth_rb->Base.Format == MESA_FORMAT_X8_Z24); > > - if (stencil_rb->region->tiling == I915_TILING_Y) { > + if (stencil_rb->region->tiling == I915_TILING_NONE) { > + /* > + * The stencil buffer is actually W tiled. The region's tiling is > + * I915_TILING_NONE, however, because the GTT is incapable of W > + * fencing. > + */ > intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_TRUE; > return; > } else { > @@ -1527,7 +1532,7 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, > * Presently, however, no verification or clean up is necessary, and > * execution should not reach here. If the framebuffer still has a hiz > * region, then we have already set dri2_has_hiz to true after > - * confirming above that the stencil buffer is Y tiled. > + * confirming above that the stencil buffer is W tiled. > */ > assert(0); > } > diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c > b/src/mesa/drivers/dri/intel/intel_fbo.c > index 1669af2..507cc33 100644 > --- a/src/mesa/drivers/dri/intel/intel_fbo.c > +++ b/src/mesa/drivers/dri/intel/intel_fbo.c > @@ -173,6 +173,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, > struct gl_renderbuffer > > if (irb->Base.Format == MESA_FORMAT_S8) { > /* > + * The stencil buffer is W tiled. However, we request from the kernel a > + * non-tiled buffer because the GTT is incapable of W fencing. > + * > * The stencil buffer has quirky pitch requirements. From Vol 2a, > * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": > * The pitch must be set to 2x the value computed based on width, as > @@ -180,14 +183,14 @@ intel_alloc_renderbuffer_storage(struct gl_context * > ctx, struct gl_renderbuffer > * To accomplish this, we resort to the nasty hack of doubling the drm > * region's cpp and halving its height. > * > - * If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt() > - * maps the memory incorrectly. > + * If we neglect to double the pitch, then render corruption occurs. > */ > irb->region = intel_region_alloc(intel->intelScreen, > - I915_TILING_Y, > + I915_TILING_NONE, > cpp * 2, > - width, > - height / 2, > + ALIGN(width, 64), > + /* XXX: Maybe align to 128? */ > + ALIGN(height / 2, 64), > GL_TRUE); > if (!irb->region) > return false; > diff --git a/src/mesa/drivers/dri/intel/intel_screen.h > b/src/mesa/drivers/dri/intel/intel_screen.h > index b2013af..9dd6a52 100644 > --- a/src/mesa/drivers/dri/intel/intel_screen.h > +++ b/src/mesa/drivers/dri/intel/intel_screen.h > @@ -63,9 +63,12 @@ > * x8_z24 and s8). > * > * Eventually, intel_update_renderbuffers() makes a DRI2 request for > - * DRI2BufferStencil and DRI2BufferHiz. If the returned buffers are Y tiled, > - * then we joyfully set intel_screen.dri2_has_hiz to true and continue as if > - * nothing happend. > + * DRI2BufferStencil and DRI2BufferHiz. If the stencil buffer's tiling is > + * I915_TILING_NONE [1], then we joyfully set intel_screen.dri2_has_hiz to > + * true and continue as if nothing happend. > + * > + * [1] The stencil buffer is actually W tiled. However, we request from the > + * kernel a non-tiled buffer because the GTT is incapable of W fencing. > * > * If the buffers are X tiled, however, the handshake has failed and we must > * clean up. > diff --git a/src/mesa/drivers/dri/intel/intel_span.c > b/src/mesa/drivers/dri/intel/intel_span.c > index 153803f..d306432 100644 > --- a/src/mesa/drivers/dri/intel/intel_span.c > +++ b/src/mesa/drivers/dri/intel/intel_span.c > @@ -131,38 +131,77 @@ intel_set_span_functions(struct intel_context *intel, > int miny = 0; \ > int maxx = rb->Width; \ > int maxy = rb->Height; \ > - int stride = rb->RowStride; > \ > - uint8_t *buf = rb->Data; \ > + \ > + /* > \ > + * Here we ignore rb->Data and rb->RowStride as set by \ > + * intelSpanRenderStart. Since intel_offset_S8 decodes the W tile \ > + * manually, the region's *real* base address and stride is > \ > + * required. > \ > + */ > \ > + struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ > + uint8_t *buf = irb->region->buffer->virtual; > \ > + unsigned stride = irb->region->pitch; \ > + unsigned height = 2 * irb->region->height; > \ > + bool flip = rb->Name == 0; > \ > > -/* Don't flip y. */ > #undef Y_FLIP > -#define Y_FLIP(y) y > +#define Y_FLIP(y) ((1 - flip) * y + flip * (height - 1 - y)) > > /** > * \brief Get pointer offset into stencil buffer. > * > - * The stencil buffer interleaves two rows into one. Yay for crazy hardware. > - * The table below demonstrates how the pointer arithmetic behaves for a > buffer > - * with positive stride (s=stride). > - * > - * x | y | byte offset > - * -------------------------- > - * 0 | 0 | 0 > - * 0 | 1 | 1 > - * 1 | 0 | 2 > - * 1 | 1 | 3 > - * ... | ... | ... > - * 0 | 2 | s > - * 0 | 3 | s + 1 > - * 1 | 2 | s + 2 > - * 1 | 3 | s + 3 > - * > + * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we > + * must decode the tile's layout in software. > * > + * See > + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile > + * Format. > + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling > Algorithm > */ > -static inline intptr_t > -intel_offset_S8(int stride, GLint x, GLint y) > +static inline uintptr_t
Why the type change? > +intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y) > { > - return 2 * ((y / 2) * stride + x) + y % 2; > + uint32_t tile_size = 4096; > + uint32_t tile_width = 64; > + uint32_t tile_height = 64; > + uint32_t row_size = 64 * stride; > + > + uint32_t tile_x = x / tile_width; > + uint32_t tile_y = y / tile_height; > + > + /* The byte's address relative to the tile's base addres. */ > + uint32_t byte_x = x % tile_width; > + uint32_t byte_y = y % tile_height; > + > + uintptr_t u = tile_y * row_size > + + tile_x * tile_size > + + 512 * (byte_x / 8) > + + 64 * (byte_y / 8) > + + 32 * ((byte_y / 4) % 2) > + + 16 * ((byte_x / 4) % 2) > + + 8 * ((byte_y / 2) % 2) > + + 4 * ((byte_x / 2) % 2) > + + 2 * (byte_y % 2) > + + 1 * (byte_x % 2); > + > + /* > + * Errata for Gen5: > + * > + * An additional offset is needed which is not documented in the PRM. > + * > + * if ((byte_x / 8) % 2 == 1) { > + * if ((byte_y / 8) % 2) == 0) { > + * u += 64; > + * } else { > + * u -= 64; > + * } > + * } > + * > + * The offset is expressed more tersely as > + * u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1)); > + */ > + > + return u; > } > > #define WRITE_STENCIL(x, y, src) buf[intel_offset_S8(stride, x, y)] = src; -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/ iEYEARECAAYFAk4kgDUACgkQX1gOwKyEAw+9WgCffPOqrYW9zIZ0114HTmwcXfCP t7sAnA6cpwuNkL+o6/yc20DI27xzSopm =66S9 -----END PGP SIGNATURE----- _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev