[Mesa-dev] [PATCH 6/6] i965: Allow Y-tiled allocations for large surfaces
This patch will use a new calculation to determine if a surface can be blitted from or to. Previously, the total_height member was used. Total_height in the case of 2d, 3d, and cube map arrays is the height of each slice/layer/face. Since the GL map APIS only ever deal with a slice at a time however, the determining factor is really the height of one slice. This patch also has a side effect of not needing to set potentially large texture objects to the CPU domain, which implies we do not need to clflush the entire objects. (See references below for a kernel patch to achieve the same thing) With both the Y-tiled surfaces, and the removal of excessive clflushes, this improves the terrain benchmark on Cherryview (data collected by Jordan) Jordan was extremely helpful in creating this patch. Consider him co-author. v2: Remove assertion which didn't belong. This assert was only meant for the patch which renamed gtt mappings to really mean GTT mappings. This should fix around 150 piglit failures Some reworks to centralize blitability determination (Jason, Jordan) v3: Rebased with some non-trivial conflicts on Anuj's blitter fixes. v4: Another non-trivial rebase References: http://patchwork.freedesktop.org/patch/38909/ Cc: Jordan Justen jordan.l.jus...@intel.com Signed-off-by: Ben Widawsky b...@bwidawsk.net --- src/mesa/drivers/dri/i965/brw_tex_layout.c| 20 ++ src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 29 +-- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 28 ++ 3 files changed, 54 insertions(+), 23 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 88bfe35..e2fcea3 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -675,10 +675,22 @@ brw_miptree_choose_tiling(struct brw_context *brw, return I915_TILING_NONE; if (ALIGN(minimum_pitch, 512) = INTEL_MAX_BLIT_PITCH || - mt-total_width = INTEL_MAX_BLIT_PITCH || - mt-total_height = INTEL_MAX_BLIT_ROWS) { - perf_debug(%dx%d miptree too large to blit, falling back to untiled, - mt-total_width, mt-total_height); + intel_miptree_exceeds_blit_height(mt)) { + if (mt-format == GL_TEXTURE_3D) { + perf_debug(Unsupported large 3D texture blit. +Falling back to untiled.\n); + } else { + /* qpitch should always be greater than or equal to the tile aligned + * maximum of lod0 height. That is sufficient to determine if we can + * blit, but the most optimal value is potentially less. + */ + if (mt-physical_height0 INTEL_MAX_BLIT_ROWS) { +perf_debug(Potentially skipped a blittable buffers %d\n, + mt-physical_height0); + } + perf_debug(%dx%d miptree too large to blit, falling back to untiled, +mt-total_width, mt-total_height); + } return I915_TILING_NONE; } diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index ba784de..894f5f1 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -677,12 +677,19 @@ intel_miptree_create(struct brw_context *brw, mt-pitch = pitch; + uint32_t size = mt-bo-size; + if (mt-tiling) { + size = ALIGN(intel_miptree_blit_height(mt) * mt-pitch, + intel_blit_tile_height(mt-tiling)); + assert(size = mt-bo-size); + } + /* If the BO is too large to fit in the aperture, we need to use the * BLT engine to support it. Prior to Sandybridge, the BLT paths can't * handle Y-tiling, so we need to fall back to X. */ if (brw-gen 6 - mt-bo-size = brw-max_gtt_map_object_size + size = brw-max_gtt_map_object_size mt-tiling == I915_TILING_Y) { perf_debug(%dx%d miptree larger than aperture; falling back to X-tiled\n, mt-total_width, mt-total_height); @@ -2599,23 +2606,7 @@ intel_miptree_release_map(struct intel_mipmap_tree *mt, *map = NULL; } -static bool -can_blit_slice(struct intel_mipmap_tree *mt, - unsigned int level, unsigned int slice) -{ - uint32_t image_x; - uint32_t image_y; - intel_miptree_get_image_offset(mt, level, slice, image_x, image_y); - if (image_x = INTEL_MAX_BLIT_PITCH || image_y = INTEL_MAX_BLIT_ROWS) - return false; - /* See intel_miptree_blit() for details on the 32k pitch limit. */ - if (mt-pitch = INTEL_MAX_BLIT_PITCH) - return false; - - return true; -} - static bool use_intel_mipree_map_blit(struct brw_context *brw, struct intel_mipmap_tree *mt, @@ -2632,12 +2623,12 @@ use_intel_mipree_map_blit(struct brw_context *brw, (mt-tiling == I915_TILING_X || /* Prior to Sandybridge, the blitter can't handle Y tiling
Re: [Mesa-dev] [PATCH 6/6] i965: Allow Y-tiled allocations for large surfaces
On Mon, Mar 23, 2015 at 02:52:50PM +, Neil Roberts wrote: Sorry for the delay in replying to this. Ben Widawsky b...@bwidawsk.net writes: +static inline uint32_t +intel_miptree_blit_height(struct intel_mipmap_tree *mt) +{ + switch (mt-target) { + case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_1D_ARRAY: + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + case GL_TEXTURE_CUBE_MAP_ARRAY: + assert(mt-logical_depth0); + return mt-qpitch; + case GL_TEXTURE_3D: + /* FIXME 3d textures don't have a qpitch. I think it's simply the tiled + * aligned mt-physical_height0. Since 3D textures aren't used often, just + * print the perf debug from the caller and bail + */ + /* fallthrough */ + default: + return mt-total_height; + } +} This function might stop working on Skylake if we land my patch to fix the qpitch calculation. In that case the qpitch isn't necessarily a count of the number of rows. In 1D textures it is the number of pixels and for compressed textures it is the number of blocks. Maybe we could also store the physical_qpitch that is calculated in brw_miptree_layout_texture_array? I'm pretty sure today we never use the blitter for compressed textures. Therefore, I believe we can ignore that case. In the case where we use pixels, I believe it will still work fine as long as long as each layer is tightly packed (which I thought it was). If it's not, then I suppose we have a problem. I'm also totally fine with making 1D fallthrough since I don't think it's a particularly common case for it to surpass total_height anyway. I'm not sure what you are getting at here. Regardless of whether the 1D slices are tightly packed, we can't just return the qpitch value here for 1D textures because it has no relation to the height of the image. The height of the image is always 1. The images actually aren't tightly packed on Skylake because they need to be aligned to 64 pixels. Sorry, you are correct I was thinking total_height, not qpitch. As for the SKL restriction, you're also right, SKL support wasn't yet merged when I originally authored the patches. Is there any reason why we can't just use mt-logical_height0 instead of trying to look at the qpitch? If everything using the blitter is operating on one slice at a time, why would it ever try to blit something that is taller than the height? It would be pointless to try to include the padding between slices in the blit, wouldn't it? You're right about the last part. Given that I wanted this function to return the height to be blitted, I can't return just logical_height0 since it's not necessarily tiled aligned. The hypocrisy is noted in that I am already not returning the actual amount to be blitted. Rounding logical_height0 up to a tile achieves what I want [I think]. Coincidentally the next part you point out does take care of the problem where your height might be blittable but the tile aligned height is not. I'd like Jason and/or Jordan to weigh in since they were a large part of the current design. It seems like if I do return the tiled aligned height here, I can kill miptree_exceeds_blit_height() and do the simple height compare. I would be in favor of that. Looking at the patch again in more detail I noticed something else that I missed the first time. diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 16bd151..ee8fae4 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -86,7 +86,6 @@ compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target) } } - /** * For single-sampled render targets (non-MSRT), the MCS buffer is a * scaled-down bitfield representation of the color buffer which is capable of @@ -437,6 +436,12 @@ intel_miptree_create_layout(struct brw_context *brw, return mt; } +static bool +miptree_exceeds_blit_height(struct intel_mipmap_tree *mt) +{ + return intel_miptree_blit_height(mt) = intel_blit_tile_height(mt-tiling); +} Is that supposed to be = intel_blit_max_height instead? Otherwise it's going to disable tiling for any texture that is taller than a single tile, right? See above. If I do keep it, it definitely needs a comment. Regards, - Neil Thanks. -- Ben Widawsky, Intel Open Source Technology Center ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/6] i965: Allow Y-tiled allocations for large surfaces
Sorry for the delay in replying to this. Ben Widawsky b...@bwidawsk.net writes: +static inline uint32_t +intel_miptree_blit_height(struct intel_mipmap_tree *mt) +{ + switch (mt-target) { + case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_1D_ARRAY: + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + case GL_TEXTURE_CUBE_MAP_ARRAY: + assert(mt-logical_depth0); + return mt-qpitch; + case GL_TEXTURE_3D: + /* FIXME 3d textures don't have a qpitch. I think it's simply the tiled + * aligned mt-physical_height0. Since 3D textures aren't used often, just + * print the perf debug from the caller and bail + */ + /* fallthrough */ + default: + return mt-total_height; + } +} This function might stop working on Skylake if we land my patch to fix the qpitch calculation. In that case the qpitch isn't necessarily a count of the number of rows. In 1D textures it is the number of pixels and for compressed textures it is the number of blocks. Maybe we could also store the physical_qpitch that is calculated in brw_miptree_layout_texture_array? I'm pretty sure today we never use the blitter for compressed textures. Therefore, I believe we can ignore that case. In the case where we use pixels, I believe it will still work fine as long as long as each layer is tightly packed (which I thought it was). If it's not, then I suppose we have a problem. I'm also totally fine with making 1D fallthrough since I don't think it's a particularly common case for it to surpass total_height anyway. I'm not sure what you are getting at here. Regardless of whether the 1D slices are tightly packed, we can't just return the qpitch value here for 1D textures because it has no relation to the height of the image. The height of the image is always 1. The images actually aren't tightly packed on Skylake because they need to be aligned to 64 pixels. Is there any reason why we can't just use mt-logical_height0 instead of trying to look at the qpitch? If everything using the blitter is operating on one slice at a time, why would it ever try to blit something that is taller than the height? It would be pointless to try to include the padding between slices in the blit, wouldn't it? Looking at the patch again in more detail I noticed something else that I missed the first time. diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 16bd151..ee8fae4 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -86,7 +86,6 @@ compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target) } } - /** * For single-sampled render targets (non-MSRT), the MCS buffer is a * scaled-down bitfield representation of the color buffer which is capable of @@ -437,6 +436,12 @@ intel_miptree_create_layout(struct brw_context *brw, return mt; } +static bool +miptree_exceeds_blit_height(struct intel_mipmap_tree *mt) +{ + return intel_miptree_blit_height(mt) = intel_blit_tile_height(mt-tiling); +} Is that supposed to be = intel_blit_max_height instead? Otherwise it's going to disable tiling for any texture that is taller than a single tile, right? Regards, - Neil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/6] i965: Allow Y-tiled allocations for large surfaces
On Tue, Mar 10, 2015 at 05:39:24PM +, Neil Roberts wrote: Ben Widawsky benjamin.widaw...@intel.com writes: This patch will use a new calculation to determine if a surface can be blitted from or to. Previously, the total_height member was used. Total_height in the case of 2d, 3d, and cube map arrays is the height of each slice/layer/face. Since the GL map APIS only ever deal with a slice at a time however, the determining factor is really the height of one slice. Do you mean to say that total_height is the combined height of all of the slices/layers/faces? If so the wording is confusing. Yep. Thanks, I agree, and fixed. +static inline uint32_t +intel_miptree_blit_height(struct intel_mipmap_tree *mt) +{ + switch (mt-target) { + case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_1D_ARRAY: + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + case GL_TEXTURE_CUBE_MAP_ARRAY: + assert(mt-logical_depth0); + return mt-qpitch; + case GL_TEXTURE_3D: + /* FIXME 3d textures don't have a qpitch. I think it's simply the tiled + * aligned mt-physical_height0. Since 3D textures aren't used often, just + * print the perf debug from the caller and bail + */ + /* fallthrough */ + default: + return mt-total_height; + } +} This function might stop working on Skylake if we land my patch to fix the qpitch calculation. In that case the qpitch isn't necessarily a count of the number of rows. In 1D textures it is the number of pixels and for compressed textures it is the number of blocks. Maybe we could also store the physical_qpitch that is calculated in brw_miptree_layout_texture_array? I'm pretty sure today we never use the blitter for compressed textures. Therefore, I believe we can ignore that case. In the case where we use pixels, I believe it will still work fine as long as long as each layer is tightly packed (which I thought it was). If it's not, then I suppose we have a problem. I'm also totally fine with making 1D fallthrough since I don't think it's a particularly common case for it to surpass total_height anyway. Any preference on which to land first (and therefore who has to fix it)? My patches are older, but yours potentially fixes bugs and mine is just perf. Regards, - Neil -- Ben Widawsky, Intel Open Source Technology Center ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/6] i965: Allow Y-tiled allocations for large surfaces
Ben Widawsky benjamin.widaw...@intel.com writes: This patch will use a new calculation to determine if a surface can be blitted from or to. Previously, the total_height member was used. Total_height in the case of 2d, 3d, and cube map arrays is the height of each slice/layer/face. Since the GL map APIS only ever deal with a slice at a time however, the determining factor is really the height of one slice. Do you mean to say that total_height is the combined height of all of the slices/layers/faces? If so the wording is confusing. +static inline uint32_t +intel_miptree_blit_height(struct intel_mipmap_tree *mt) +{ + switch (mt-target) { + case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_1D_ARRAY: + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + case GL_TEXTURE_CUBE_MAP_ARRAY: + assert(mt-logical_depth0); + return mt-qpitch; + case GL_TEXTURE_3D: + /* FIXME 3d textures don't have a qpitch. I think it's simply the tiled + * aligned mt-physical_height0. Since 3D textures aren't used often, just + * print the perf debug from the caller and bail + */ + /* fallthrough */ + default: + return mt-total_height; + } +} This function might stop working on Skylake if we land my patch to fix the qpitch calculation. In that case the qpitch isn't necessarily a count of the number of rows. In 1D textures it is the number of pixels and for compressed textures it is the number of blocks. Maybe we could also store the physical_qpitch that is calculated in brw_miptree_layout_texture_array? Regards, - Neil pgplZZBaIPilJ.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] i965: Allow Y-tiled allocations for large surfaces
This patch will use a new calculation to determine if a surface can be blitted from or to. Previously, the total_height member was used. Total_height in the case of 2d, 3d, and cube map arrays is the height of each slice/layer/face. Since the GL map APIS only ever deal with a slice at a time however, the determining factor is really the height of one slice. This patch also has a side effect of not needing to set potentially large texture objects to the CPU domain, which implies we do not need to clflush the entire objects. (See references below for a kernel patch to achieve the same thing) With both the Y-tiled surfaces, and the removal of excessive clflushes, this improves the terrain benchmark on Cherryview (data collected by Jordan) Jordan was extremely helpful in creating this patch. Consider him co-author. v2: Remove assertion which didn't belong. This assert was only meant for the patch which renamed gtt mappings to really mean GTT mappings. This should fix around 150 piglit failures Some reworks to centralize blitability determination (Jason, Jordan) v3: Rebased with some non-trivial conflicts on Anuj's blitter fixes. References: http://patchwork.freedesktop.org/patch/38909/ Cc: Jordan Justen jordan.l.jus...@intel.com Signed-off-by: Ben Widawsky b...@bwidawsk.net --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 55 +++ src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 21 ++ 2 files changed, 52 insertions(+), 24 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 16bd151..ee8fae4 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -86,7 +86,6 @@ compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target) } } - /** * For single-sampled render targets (non-MSRT), the MCS buffer is a * scaled-down bitfield representation of the color buffer which is capable of @@ -437,6 +436,12 @@ intel_miptree_create_layout(struct brw_context *brw, return mt; } +static bool +miptree_exceeds_blit_height(struct intel_mipmap_tree *mt) +{ + return intel_miptree_blit_height(mt) = intel_blit_tile_height(mt-tiling); +} + /** * \brief Helper function for intel_miptree_create(). */ @@ -502,10 +507,22 @@ intel_miptree_choose_tiling(struct brw_context *brw, return I915_TILING_NONE; if (ALIGN(minimum_pitch, 512) = INTEL_MAX_BLIT_PITCH || - mt-total_width = INTEL_MAX_BLIT_PITCH || - mt-total_height = INTEL_MAX_BLIT_ROWS) { - perf_debug(%dx%d miptree too large to blit, falling back to untiled, - mt-total_width, mt-total_height); + miptree_exceeds_blit_height(mt)) { + if (mt-format == GL_TEXTURE_3D) { + perf_debug(Unsupported large 3D texture blit. +Falling back to untiled.\n); + } else { + /* qpitch should always be greater than or equal to the tile aligned + * maximum of lod0 height. That is sufficient to determine if we can + * blit, but the most optimal value is potentially less. + */ + if (mt-physical_height0 INTEL_MAX_BLIT_ROWS) { +perf_debug(Potentially skipped a blittable buffers %d\n, + mt-physical_height0); + } + perf_debug(%dx%d miptree too large to blit, falling back to untiled, +mt-total_width, mt-total_height); + } return I915_TILING_NONE; } @@ -647,12 +664,18 @@ intel_miptree_create(struct brw_context *brw, BO_ALLOC_FOR_RENDER : 0)); mt-pitch = pitch; + uint32_t size = + mt-tiling ? ALIGN(intel_miptree_blit_height(mt) * mt-pitch, + intel_blit_tile_height(mt-tiling)) : + mt-bo-size; + assert(size = mt-bo-size); + /* If the BO is too large to fit in the aperture, we need to use the * BLT engine to support it. Prior to Sandybridge, the BLT paths can't * handle Y-tiling, so we need to fall back to X. */ if (brw-gen 6 - mt-bo-size = brw-max_gtt_map_object_size + size = brw-max_gtt_map_object_size mt-tiling == I915_TILING_Y requested_tiling == INTEL_MIPTREE_TILING_ANY) { perf_debug(%dx%d miptree larger than aperture; falling back to X-tiled\n, @@ -2290,23 +2313,7 @@ intel_miptree_release_map(struct intel_mipmap_tree *mt, *map = NULL; } -static bool -can_blit_slice(struct intel_mipmap_tree *mt, - unsigned int level, unsigned int slice) -{ - uint32_t image_x; - uint32_t image_y; - intel_miptree_get_image_offset(mt, level, slice, image_x, image_y); - if (image_x = INTEL_MAX_BLIT_PITCH || image_y = INTEL_MAX_BLIT_ROWS) - return false; - /* See intel_miptree_blit() for details on the 32k pitch limit. */ - if (mt-pitch = INTEL_MAX_BLIT_PITCH) - return false; - - return true; -} - static bool