A big limitation of the current direct memcpy routine is that it only recognises a couple of (admittedly) common colour types, and cannot do any inline conversion. If we pass the mesa_format down to memcpy and tell it the direction of the transfer, we can start accepting a few mixed transfers and be less picky overall.
The principal benefit code-wise is that this pushes the decision about handled formats from the multiple callers to the tiled-memcpy backend. Cc: Matt Turner <matts...@gmail.com> Cc: Kenneth Graunke <kenn...@whitecape.org> --- src/mesa/drivers/dri/i965/intel_pixel_read.c | 26 +--- src/mesa/drivers/dri/i965/intel_tex_image.c | 47 +++---- src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 185 +++++++++++++++++-------- src/mesa/drivers/dri/i965/intel_tiled_memcpy.h | 17 ++- 4 files changed, 167 insertions(+), 108 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index 4528d6d265..9cb48e5ed2 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -82,20 +82,12 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, return false; struct intel_renderbuffer *irb = intel_renderbuffer(rb); - int dst_pitch; - - /* The miptree's buffer. */ - struct brw_bo *bo; - - uint32_t cpp; - mem_copy_fn mem_copy = NULL; /* This fastpath is restricted to specific renderbuffer types: * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support * more types. */ if (!devinfo->has_llc || - !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) || pixels == NULL || _mesa_is_bufferobj(pack->BufferObj) || pack->Alignment > 4 || @@ -118,15 +110,9 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, if (rb->NumSamples > 1) return false; - /* We can't handle copying from RGBX or BGRX because the tiled_memcpy - * function doesn't set the last channel to 1. Note this checks BaseFormat - * rather than TexFormat in case the RGBX format is being simulated with an - * RGBA format. - */ - if (rb->_BaseFormat == GL_RGB) - return false; - - if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp)) + mem_copy_fn mem_copy = + intel_get_memcpy(rb->Format, format, type, INTEL_DOWNLOAD); + if (mem_copy == NULL) return false; if (!irb->mt || @@ -152,7 +138,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, */ intel_miptree_access_raw(brw, irb->mt, irb->mt_level, irb->mt_layer, false); - bo = irb->mt->bo; + struct brw_bo *bo = irb->mt->bo; if (brw_batch_references(&brw->batch, bo)) { perf_debug("Flushing before mapping a referenced bo.\n"); @@ -171,7 +157,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, xoffset += slice_offset_x; yoffset += slice_offset_y; - dst_pitch = _mesa_image_row_stride(pack, width, format, type); + int dst_pitch = _mesa_image_row_stride(pack, width, format, type); /* For a window-system renderbuffer, the buffer is actually flipped * vertically, so we need to handle that. Since the detiling function @@ -199,6 +185,8 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, pack->Alignment, pack->RowLength, pack->SkipPixels, pack->SkipRows); + uint32_t cpp = _mesa_get_format_bytes(rb->Format); + tiled_to_linear( xoffset * cpp, (xoffset + width) * cpp, yoffset, yoffset + height, diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index 37c8e24f03..1039d80bf3 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -187,13 +187,6 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, struct brw_context *brw = brw_context(ctx); const struct gen_device_info *devinfo = &brw->screen->devinfo; struct intel_texture_image *image = intel_texture_image(texImage); - int src_pitch; - - /* The miptree's buffer. */ - struct brw_bo *bo; - - uint32_t cpp; - mem_copy_fn mem_copy = NULL; /* This fastpath is restricted to specific texture types: * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support @@ -205,7 +198,6 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, * we need tests. */ if (!devinfo->has_llc || - !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) || !(texImage->TexObject->Target == GL_TEXTURE_2D || texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) || pixels == NULL || @@ -223,7 +215,12 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, if (ctx->_ImageTransferState) return false; - if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp)) + if (format == GL_COLOR_INDEX) + return false; + + mem_copy_fn mem_copy = + intel_get_memcpy(texImage->TexFormat, format, type, INTEL_UPLOAD); + if (mem_copy == NULL) return false; /* If this is a nontrivial texture view, let another path handle it instead. */ @@ -258,7 +255,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, intel_miptree_access_raw(brw, image->mt, level, 0, true); - bo = image->mt->bo; + struct brw_bo *bo = image->mt->bo; if (brw_batch_references(&brw->batch, bo)) { perf_debug("Flushing before mapping a referenced bo.\n"); @@ -271,7 +268,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, return false; } - src_pitch = _mesa_image_row_stride(packing, width, format, type); + int src_pitch = _mesa_image_row_stride(packing, width, format, type); /* We postponed printing this message until having committed to executing * the function. @@ -290,6 +287,8 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, xoffset += level_x; yoffset += level_y; + uint32_t cpp = _mesa_get_format_bytes(texImage->TexFormat); + linear_to_tiled( xoffset * cpp, (xoffset + width) * cpp, yoffset, yoffset + height, @@ -627,13 +626,6 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, struct brw_context *brw = brw_context(ctx); const struct gen_device_info *devinfo = &brw->screen->devinfo; struct intel_texture_image *image = intel_texture_image(texImage); - int dst_pitch; - - /* The miptree's buffer. */ - struct brw_bo *bo; - - uint32_t cpp; - mem_copy_fn mem_copy = NULL; /* This fastpath is restricted to specific texture types: * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support @@ -645,7 +637,6 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, * we need tests. */ if (!devinfo->has_llc || - !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) || !(texImage->TexObject->Target == GL_TEXTURE_2D || texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) || pixels == NULL || @@ -659,15 +650,9 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, packing->Invert) return false; - /* We can't handle copying from RGBX or BGRX because the tiled_memcpy - * function doesn't set the last channel to 1. Note this checks BaseFormat - * rather than TexFormat in case the RGBX format is being simulated with an - * RGBA format. - */ - if (texImage->_BaseFormat == GL_RGB) - return false; - - if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp)) + mem_copy_fn mem_copy = + intel_get_memcpy(texImage->TexFormat, format, type, INTEL_DOWNLOAD); + if (mem_copy == NULL) return false; /* If this is a nontrivial texture view, let another path handle it instead. */ @@ -702,7 +687,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, intel_miptree_access_raw(brw, image->mt, level, 0, true); - bo = image->mt->bo; + struct brw_bo *bo = image->mt->bo; if (brw_batch_references(&brw->batch, bo)) { perf_debug("Flushing before mapping a referenced bo.\n"); @@ -715,7 +700,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, return false; } - dst_pitch = _mesa_image_row_stride(packing, width, format, type); + int dst_pitch = _mesa_image_row_stride(packing, width, format, type); DBG("%s: level=%d x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x " "mesa_format=0x%x tiling=%d " @@ -731,6 +716,8 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, xoffset += level_x; yoffset += level_y; + uint32_t cpp = _mesa_get_format_bytes(texImage->TexFormat); + tiled_to_linear( xoffset * cpp, (xoffset + width) * cpp, yoffset, yoffset + height, diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c index 53a5679691..dff6976bdd 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c @@ -33,6 +33,11 @@ #include "util/macros.h" +#include "main/glheader.h" +#include "main/enums.h" +#include "main/mtypes.h" +#include "main/glformats.h" + #include "brw_context.h" #include "intel_tiled_memcpy.h" @@ -213,6 +218,37 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes) return dst; } +static inline void * +rgbx8(void *dst, const void *src, size_t bytes) +{ + uint8_t *d = dst; + uint8_t const *s = src; + + while (bytes >= 4) { + d[0] = s[2]; + d[1] = s[1]; + d[2] = s[0]; + d[3] = 0xff; + d += 4; + s += 4; + bytes -= 4; + } + return dst; +} + +static inline void * +bgrx8(void *dst, const void *src, size_t bytes) +{ + uint32_t *d = dst; + uint32_t const *s = src; + + while (bytes >= 4) { + *d++ = *s++ | 0xff000000; + bytes -= 4; + } + return dst; +} + /** * Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3). * These ranges are in bytes, i.e. pixels * bytes-per-pixel. @@ -473,7 +509,9 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, src_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_dst); else - unreachable("not reached"); + return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height, + dst, src, src_pitch, swizzle_bit, + mem_copy, memcpy); } else { if (mem_copy == memcpy) return linear_to_xtiled(x0, x1, x2, x3, y0, y1, @@ -484,10 +522,11 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, src_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_dst); else - unreachable("not reached"); + return linear_to_xtiled(x0, x1, x2, x3, y0, y1, + dst, src, src_pitch, swizzle_bit, + mem_copy, mem_copy); } - linear_to_xtiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy); + unreachable("not reached"); } /** @@ -516,7 +555,9 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, src_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_dst); else - unreachable("not reached"); + return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height, + dst, src, src_pitch, swizzle_bit, + mem_copy, mem_copy); } else { if (mem_copy == memcpy) return linear_to_ytiled(x0, x1, x2, x3, y0, y1, @@ -526,10 +567,11 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, src_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_dst); else - unreachable("not reached"); + return linear_to_ytiled(x0, x1, x2, x3, y0, y1, + dst, src, src_pitch, swizzle_bit, + mem_copy, mem_copy); } - linear_to_ytiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy); + unreachable("not reached"); } /** @@ -558,7 +600,9 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); else - unreachable("not reached"); + return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, + dst, src, dst_pitch, swizzle_bit, + mem_copy, mem_copy); } else { if (mem_copy == memcpy) return xtiled_to_linear(x0, x1, x2, x3, y0, y1, @@ -568,10 +612,11 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); else - unreachable("not reached"); + return xtiled_to_linear(x0, x1, x2, x3, y0, y1, + dst, src, dst_pitch, swizzle_bit, + mem_copy, mem_copy); } - xtiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy); + unreachable("not reached"); } /** @@ -600,7 +645,9 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); else - unreachable("not reached"); + return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, + dst, src, dst_pitch, swizzle_bit, + mem_copy, mem_copy); } else { if (mem_copy == memcpy) return ytiled_to_linear(x0, x1, x2, x3, y0, y1, @@ -610,10 +657,11 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); else - unreachable("not reached"); + return ytiled_to_linear(x0, x1, x2, x3, y0, y1, + dst, src, dst_pitch, swizzle_bit, + mem_copy, mem_copy); } - ytiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy); + unreachable("not reached"); } /** @@ -812,51 +860,74 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2, * \param[in] tiledFormat The format of the tiled image * \param[in] format The GL format of the client data * \param[in] type The GL type of the client data - * \param[out] mem_copy Will be set to one of either the standard - * library's memcpy or a different copy function - * that performs an RGBA to BGRA conversion - * \param[out] cpp Number of bytes per channel * - * \return true if the format and type combination are valid + * \return the mem_copy_fn if the format and type combination are valid */ -bool intel_get_memcpy(mesa_format tiledFormat, GLenum format, - GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp) +mem_copy_fn intel_get_memcpy(mesa_format tiledFormat, + GLenum format, GLenum type, + enum intel_memcpy_direction direction) { - if (type == GL_UNSIGNED_INT_8_8_8_8_REV && - !(format == GL_RGBA || format == GL_BGRA)) - return false; /* Invalid type/format combination */ - - if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) || - (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) { - *cpp = 1; - *mem_copy = memcpy; - } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) || - (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) || - (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) || - (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) { - *cpp = 4; - if (format == GL_BGRA) { - *mem_copy = memcpy; - } else if (format == GL_RGBA) { - *mem_copy = rgba8_copy; - } - } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) || - (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) || - (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) || - (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) { - *cpp = 4; - if (format == GL_BGRA) { - /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can - * use the same function. - */ - *mem_copy = rgba8_copy; - } else if (format == GL_RGBA) { - *mem_copy = memcpy; - } + if (type == GL_BITMAP) + return NULL; + + /* Stencil tiling is a lie, though we could do similar manual detiling */ + switch ((int)tiledFormat) { + case MESA_FORMAT_S_UINT8: + case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: + case MESA_FORMAT_Z24_UNORM_X8_UINT: + case MESA_FORMAT_Z24_UNORM_S8_UINT: + return NULL; } - if (!(*mem_copy)) - return false; + if (_mesa_is_format_compressed(tiledFormat)) + return NULL; + + mesa_format user_format = _mesa_format_from_format_and_type(format, type); + if (_mesa_format_is_mesa_array_format(user_format)) + user_format = _mesa_format_from_array_format(user_format); + + mem_copy_fn fn = NULL; + + if (user_format == tiledFormat) { + /* Prevent any implicit conversions */ + if (_mesa_unpack_format_to_base_format(format) != + _mesa_get_format_base_format(tiledFormat)) + fn = memcpy; + } else switch ((int)tiledFormat) { + case MESA_FORMAT_B8G8R8A8_UNORM: + if (user_format == MESA_FORMAT_R8G8B8A8_UNORM) + fn = rgba8_copy; + else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM) + fn = direction == INTEL_UPLOAD ? rgbx8 : memcpy; + else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM) + fn = direction == INTEL_UPLOAD ? bgrx8 : memcpy; + break; + case MESA_FORMAT_B8G8R8X8_UNORM: + if (user_format == MESA_FORMAT_B8G8R8A8_UNORM) + fn = direction == INTEL_UPLOAD ? memcpy : bgrx8; + else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM) + fn = direction == INTEL_UPLOAD ? rgba8_copy : bgrx8; + else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM) + fn = rgba8_copy; + break; + + case MESA_FORMAT_R8G8B8A8_UNORM: + if (user_format == MESA_FORMAT_B8G8R8A8_UNORM) + fn = rgba8_copy; + else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM) + fn = direction == INTEL_UPLOAD ? rgbx8 : memcpy; + else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM) + fn = direction == INTEL_UPLOAD ? bgrx8 : memcpy; + break; + case MESA_FORMAT_R8G8B8X8_UNORM: + if (user_format == MESA_FORMAT_R8G8B8A8_UNORM) + fn = direction == INTEL_UPLOAD ? memcpy : bgrx8; + else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM) + fn = direction == INTEL_UPLOAD ? rgba8_copy : bgrx8; + else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM) + fn = rgba8_copy; + break; + } - return true; + return fn; } diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h index 62ec8847fb..e9c43920a1 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h @@ -55,7 +55,20 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2, enum isl_tiling tiling, mem_copy_fn mem_copy); -bool intel_get_memcpy(mesa_format tiledFormat, GLenum format, - GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp); +/* Tells intel_get_memcpy() whether the memcpy() is + * + * - an upload to the GPU with an aligned destination and a potentially + * unaligned source; or + * - a download from the GPU with an aligned source and a potentially + * unaligned destination. + */ +enum intel_memcpy_direction { + INTEL_UPLOAD, + INTEL_DOWNLOAD +}; + +mem_copy_fn intel_get_memcpy(mesa_format tiledFormat, + GLenum format, GLenum type, + enum intel_memcpy_direction direction); #endif /* INTEL_TILED_MEMCPY */ -- 2.15.0.rc1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev