[Mesa-dev] [PATCH 6/6] i965: Allow Y-tiled allocations for large surfaces

2015-08-10 Thread Ben Widawsky
This patch will use a new calculation to determine if a surface can be blitted
from or to. Previously, the total_height member was used. Total_height in the
case of 2d, 3d, and cube map arrays is the height of each slice/layer/face.
Since the GL map APIS only ever deal with a slice at a time however, the
determining factor is really the height of one slice.

This patch also has a side effect of not needing to set potentially large
texture objects to the CPU domain, which implies we do not need to clflush the
entire objects. (See references below for a kernel patch to achieve the same
thing)

With both the Y-tiled surfaces, and the removal of excessive clflushes, this
improves the terrain benchmark on Cherryview (data collected by Jordan)

Jordan was extremely helpful in creating this patch. Consider him co-author.

v2: Remove assertion which didn't belong. This assert was only meant for the
patch which renamed gtt mappings to really mean GTT mappings. This should fix
around 150 piglit failures
Some reworks to centralize blitability determination (Jason, Jordan)

v3: Rebased with some non-trivial conflicts on Anuj's blitter fixes.

v4: Another non-trivial rebase

References: http://patchwork.freedesktop.org/patch/38909/
Cc: Jordan Justen jordan.l.jus...@intel.com
Signed-off-by: Ben Widawsky b...@bwidawsk.net
---
 src/mesa/drivers/dri/i965/brw_tex_layout.c| 20 ++
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 29 +--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 28 ++
 3 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 88bfe35..e2fcea3 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -675,10 +675,22 @@ brw_miptree_choose_tiling(struct brw_context *brw,
   return I915_TILING_NONE;
 
if (ALIGN(minimum_pitch, 512) = INTEL_MAX_BLIT_PITCH ||
-   mt-total_width = INTEL_MAX_BLIT_PITCH ||
-   mt-total_height = INTEL_MAX_BLIT_ROWS) {
-  perf_debug(%dx%d miptree too large to blit, falling back to untiled,
- mt-total_width, mt-total_height);
+   intel_miptree_exceeds_blit_height(mt)) {
+  if (mt-format == GL_TEXTURE_3D) {
+ perf_debug(Unsupported large 3D texture blit. 
+Falling back to untiled.\n);
+  } else {
+ /* qpitch should always be greater than or equal to the tile aligned
+  * maximum of lod0 height.  That is sufficient to determine if we can
+  * blit, but the most optimal value is potentially less.
+  */
+ if (mt-physical_height0  INTEL_MAX_BLIT_ROWS) {
+perf_debug(Potentially skipped a blittable buffers %d\n,
+   mt-physical_height0);
+ }
+ perf_debug(%dx%d miptree too large to blit, falling back to untiled,
+mt-total_width, mt-total_height);
+  }
   return I915_TILING_NONE;
}
 
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index ba784de..894f5f1 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -677,12 +677,19 @@ intel_miptree_create(struct brw_context *brw,
 
mt-pitch = pitch;
 
+   uint32_t size = mt-bo-size;
+   if (mt-tiling) {
+  size = ALIGN(intel_miptree_blit_height(mt) * mt-pitch,
+   intel_blit_tile_height(mt-tiling));
+  assert(size = mt-bo-size);
+   }
+
/* If the BO is too large to fit in the aperture, we need to use the
 * BLT engine to support it.  Prior to Sandybridge, the BLT paths can't
 * handle Y-tiling, so we need to fall back to X.
 */
if (brw-gen  6 
-   mt-bo-size = brw-max_gtt_map_object_size 
+   size = brw-max_gtt_map_object_size 
mt-tiling == I915_TILING_Y) {
   perf_debug(%dx%d miptree larger than aperture; falling back to 
X-tiled\n,
  mt-total_width, mt-total_height);
@@ -2599,23 +2606,7 @@ intel_miptree_release_map(struct intel_mipmap_tree *mt,
*map = NULL;
 }
 
-static bool
-can_blit_slice(struct intel_mipmap_tree *mt,
-   unsigned int level, unsigned int slice)
-{
-   uint32_t image_x;
-   uint32_t image_y;
-   intel_miptree_get_image_offset(mt, level, slice, image_x, image_y);
-   if (image_x = INTEL_MAX_BLIT_PITCH || image_y = INTEL_MAX_BLIT_ROWS)
-  return false;
-
/* See intel_miptree_blit() for details on the 32k pitch limit. */
-   if (mt-pitch = INTEL_MAX_BLIT_PITCH)
-  return false;
-
-   return true;
-}
-
 static bool
 use_intel_mipree_map_blit(struct brw_context *brw,
   struct intel_mipmap_tree *mt,
@@ -2632,12 +2623,12 @@ use_intel_mipree_map_blit(struct brw_context *brw,
(mt-tiling == I915_TILING_X ||
 /* Prior to Sandybridge, the blitter can't handle Y tiling 

Re: [Mesa-dev] [PATCH 6/6] i965: Allow Y-tiled allocations for large surfaces

2015-03-25 Thread Ben Widawsky
On Mon, Mar 23, 2015 at 02:52:50PM +, Neil Roberts wrote:
 Sorry for the delay in replying to this.
 
 Ben Widawsky b...@bwidawsk.net writes:
 
   +static inline uint32_t
   +intel_miptree_blit_height(struct intel_mipmap_tree *mt)
   +{
   +   switch (mt-target) {
   +   case GL_TEXTURE_CUBE_MAP:
   +   case GL_TEXTURE_1D_ARRAY:
   +   case GL_TEXTURE_2D_ARRAY:
   +   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
   +   case GL_TEXTURE_CUBE_MAP_ARRAY:
   +  assert(mt-logical_depth0);
   +  return mt-qpitch;
   +   case GL_TEXTURE_3D:
   +  /* FIXME 3d textures don't have a qpitch. I think it's simply the 
   tiled
   +   * aligned mt-physical_height0. Since 3D textures aren't used 
   often, just
   +   * print the perf debug from the caller and bail
   +   */
   +  /* fallthrough */
   +   default:
   +  return mt-total_height;
   +   }
   +}
  
  This function might stop working on Skylake if we land my patch to fix
  the qpitch calculation. In that case the qpitch isn't necessarily a
  count of the number of rows. In 1D textures it is the number of pixels
  and for compressed textures it is the number of blocks. Maybe we could
  also store the physical_qpitch that is calculated in
  brw_miptree_layout_texture_array?
  
 
  I'm pretty sure today we never use the blitter for compressed
  textures. Therefore, I believe we can ignore that case. In the case
  where we use pixels, I believe it will still work fine as long as long
  as each layer is tightly packed (which I thought it was). If it's not,
  then I suppose we have a problem. I'm also totally fine with making 1D
  fallthrough since I don't think it's a particularly common case for it
  to surpass total_height anyway.
 
 I'm not sure what you are getting at here. Regardless of whether the 1D
 slices are tightly packed, we can't just return the qpitch value here
 for 1D textures because it has no relation to the height of the image.
 The height of the image is always 1. The images actually aren't tightly
 packed on Skylake because they need to be aligned to 64 pixels.

Sorry, you are correct I was thinking total_height, not qpitch. As for the SKL
restriction, you're also right, SKL support wasn't yet merged when I originally
authored the patches.

 
 Is there any reason why we can't just use mt-logical_height0 instead of
 trying to look at the qpitch? If everything using the blitter is
 operating on one slice at a time, why would it ever try to blit
 something that is taller than the height? It would be pointless to try
 to include the padding between slices in the blit, wouldn't it?

You're right about the last part. Given that I wanted this function to return
the height to be blitted, I can't return just logical_height0 since it's not
necessarily tiled aligned. The hypocrisy is noted in that I am already not
returning the actual amount to be blitted.

Rounding logical_height0 up to a tile achieves what I want [I think].
Coincidentally the next part you point out does take care of the problem where
your height might be blittable but the tile aligned height is not.

I'd like Jason and/or Jordan to weigh in since they were a large part of the
current design. It seems like if I do return the tiled aligned height here, I
can kill miptree_exceeds_blit_height() and do the simple height compare. I would
be in favor of that.

 
 Looking at the patch again in more detail I noticed something else that
 I missed the first time.
 
  diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
  b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  index 16bd151..ee8fae4 100644
  --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  @@ -86,7 +86,6 @@ compute_msaa_layout(struct brw_context *brw, mesa_format 
  format, GLenum target)
  }
   }
   
  -
   /**
* For single-sampled render targets (non-MSRT), the MCS buffer is a
* scaled-down bitfield representation of the color buffer which is 
  capable of
  @@ -437,6 +436,12 @@ intel_miptree_create_layout(struct brw_context *brw,
  return mt;
   }
   
  +static bool
  +miptree_exceeds_blit_height(struct intel_mipmap_tree *mt)
  +{
  +   return intel_miptree_blit_height(mt) = 
  intel_blit_tile_height(mt-tiling);
  +}
 
 Is that supposed to be = intel_blit_max_height instead? Otherwise it's
 going to disable tiling for any texture that is taller than a single
 tile, right?

See above. If I do keep it, it definitely needs a comment.

 
 Regards,
 - Neil

Thanks.

-- 
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] i965: Allow Y-tiled allocations for large surfaces

2015-03-23 Thread Neil Roberts
Sorry for the delay in replying to this.

Ben Widawsky b...@bwidawsk.net writes:

  +static inline uint32_t
  +intel_miptree_blit_height(struct intel_mipmap_tree *mt)
  +{
  +   switch (mt-target) {
  +   case GL_TEXTURE_CUBE_MAP:
  +   case GL_TEXTURE_1D_ARRAY:
  +   case GL_TEXTURE_2D_ARRAY:
  +   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  +   case GL_TEXTURE_CUBE_MAP_ARRAY:
  +  assert(mt-logical_depth0);
  +  return mt-qpitch;
  +   case GL_TEXTURE_3D:
  +  /* FIXME 3d textures don't have a qpitch. I think it's simply the 
  tiled
  +   * aligned mt-physical_height0. Since 3D textures aren't used 
  often, just
  +   * print the perf debug from the caller and bail
  +   */
  +  /* fallthrough */
  +   default:
  +  return mt-total_height;
  +   }
  +}
 
 This function might stop working on Skylake if we land my patch to fix
 the qpitch calculation. In that case the qpitch isn't necessarily a
 count of the number of rows. In 1D textures it is the number of pixels
 and for compressed textures it is the number of blocks. Maybe we could
 also store the physical_qpitch that is calculated in
 brw_miptree_layout_texture_array?
 

 I'm pretty sure today we never use the blitter for compressed
 textures. Therefore, I believe we can ignore that case. In the case
 where we use pixels, I believe it will still work fine as long as long
 as each layer is tightly packed (which I thought it was). If it's not,
 then I suppose we have a problem. I'm also totally fine with making 1D
 fallthrough since I don't think it's a particularly common case for it
 to surpass total_height anyway.

I'm not sure what you are getting at here. Regardless of whether the 1D
slices are tightly packed, we can't just return the qpitch value here
for 1D textures because it has no relation to the height of the image.
The height of the image is always 1. The images actually aren't tightly
packed on Skylake because they need to be aligned to 64 pixels.

Is there any reason why we can't just use mt-logical_height0 instead of
trying to look at the qpitch? If everything using the blitter is
operating on one slice at a time, why would it ever try to blit
something that is taller than the height? It would be pointless to try
to include the padding between slices in the blit, wouldn't it?

Looking at the patch again in more detail I noticed something else that
I missed the first time.

 diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
 b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 index 16bd151..ee8fae4 100644
 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 @@ -86,7 +86,6 @@ compute_msaa_layout(struct brw_context *brw, mesa_format 
 format, GLenum target)
 }
  }
  
 -
  /**
   * For single-sampled render targets (non-MSRT), the MCS buffer is a
   * scaled-down bitfield representation of the color buffer which is capable 
 of
 @@ -437,6 +436,12 @@ intel_miptree_create_layout(struct brw_context *brw,
 return mt;
  }
  
 +static bool
 +miptree_exceeds_blit_height(struct intel_mipmap_tree *mt)
 +{
 +   return intel_miptree_blit_height(mt) = 
 intel_blit_tile_height(mt-tiling);
 +}

Is that supposed to be = intel_blit_max_height instead? Otherwise it's
going to disable tiling for any texture that is taller than a single
tile, right?

Regards,
- Neil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] i965: Allow Y-tiled allocations for large surfaces

2015-03-11 Thread Ben Widawsky
On Tue, Mar 10, 2015 at 05:39:24PM +, Neil Roberts wrote:
 Ben Widawsky benjamin.widaw...@intel.com writes:
 
  This patch will use a new calculation to determine if a surface can be 
  blitted
  from or to. Previously, the total_height member was used. Total_height in 
  the
  case of 2d, 3d, and cube map arrays is the height of each slice/layer/face.
  Since the GL map APIS only ever deal with a slice at a time however, the
  determining factor is really the height of one slice.
 
 Do you mean to say that total_height is the combined height of all of
 the slices/layers/faces? If so the wording is confusing.
 

Yep. Thanks, I agree, and fixed.

  +static inline uint32_t
  +intel_miptree_blit_height(struct intel_mipmap_tree *mt)
  +{
  +   switch (mt-target) {
  +   case GL_TEXTURE_CUBE_MAP:
  +   case GL_TEXTURE_1D_ARRAY:
  +   case GL_TEXTURE_2D_ARRAY:
  +   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  +   case GL_TEXTURE_CUBE_MAP_ARRAY:
  +  assert(mt-logical_depth0);
  +  return mt-qpitch;
  +   case GL_TEXTURE_3D:
  +  /* FIXME 3d textures don't have a qpitch. I think it's simply the 
  tiled
  +   * aligned mt-physical_height0. Since 3D textures aren't used 
  often, just
  +   * print the perf debug from the caller and bail
  +   */
  +  /* fallthrough */
  +   default:
  +  return mt-total_height;
  +   }
  +}
 
 This function might stop working on Skylake if we land my patch to fix
 the qpitch calculation. In that case the qpitch isn't necessarily a
 count of the number of rows. In 1D textures it is the number of pixels
 and for compressed textures it is the number of blocks. Maybe we could
 also store the physical_qpitch that is calculated in
 brw_miptree_layout_texture_array?
 

I'm pretty sure today we never use the blitter for compressed textures.
Therefore, I believe we can ignore that case. In the case where we use pixels, I
believe it will still work fine as long as long as each layer is tightly packed
(which I thought it was). If it's not, then I suppose we have a problem. I'm
also totally fine with making 1D fallthrough since I don't think it's a
particularly common case for it to surpass total_height anyway.

Any preference on which to land first (and therefore who has to fix it)? My
patches are older, but yours potentially fixes bugs and mine is just perf.

 Regards,
 - Neil



-- 
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] i965: Allow Y-tiled allocations for large surfaces

2015-03-10 Thread Neil Roberts
Ben Widawsky benjamin.widaw...@intel.com writes:

 This patch will use a new calculation to determine if a surface can be blitted
 from or to. Previously, the total_height member was used. Total_height in 
 the
 case of 2d, 3d, and cube map arrays is the height of each slice/layer/face.
 Since the GL map APIS only ever deal with a slice at a time however, the
 determining factor is really the height of one slice.

Do you mean to say that total_height is the combined height of all of
the slices/layers/faces? If so the wording is confusing.

 +static inline uint32_t
 +intel_miptree_blit_height(struct intel_mipmap_tree *mt)
 +{
 +   switch (mt-target) {
 +   case GL_TEXTURE_CUBE_MAP:
 +   case GL_TEXTURE_1D_ARRAY:
 +   case GL_TEXTURE_2D_ARRAY:
 +   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 +   case GL_TEXTURE_CUBE_MAP_ARRAY:
 +  assert(mt-logical_depth0);
 +  return mt-qpitch;
 +   case GL_TEXTURE_3D:
 +  /* FIXME 3d textures don't have a qpitch. I think it's simply the tiled
 +   * aligned mt-physical_height0. Since 3D textures aren't used often, 
 just
 +   * print the perf debug from the caller and bail
 +   */
 +  /* fallthrough */
 +   default:
 +  return mt-total_height;
 +   }
 +}

This function might stop working on Skylake if we land my patch to fix
the qpitch calculation. In that case the qpitch isn't necessarily a
count of the number of rows. In 1D textures it is the number of pixels
and for compressed textures it is the number of blocks. Maybe we could
also store the physical_qpitch that is calculated in
brw_miptree_layout_texture_array?

Regards,
- Neil


pgplZZBaIPilJ.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] i965: Allow Y-tiled allocations for large surfaces

2015-03-09 Thread Ben Widawsky
This patch will use a new calculation to determine if a surface can be blitted
from or to. Previously, the total_height member was used. Total_height in the
case of 2d, 3d, and cube map arrays is the height of each slice/layer/face.
Since the GL map APIS only ever deal with a slice at a time however, the
determining factor is really the height of one slice.

This patch also has a side effect of not needing to set potentially large
texture objects to the CPU domain, which implies we do not need to clflush the
entire objects. (See references below for a kernel patch to achieve the same
thing)

With both the Y-tiled surfaces, and the removal of excessive clflushes, this
improves the terrain benchmark on Cherryview (data collected by Jordan)

Jordan was extremely helpful in creating this patch. Consider him co-author.

v2: Remove assertion which didn't belong. This assert was only meant for the
patch which renamed gtt mappings to really mean GTT mappings. This should fix
around 150 piglit failures
Some reworks to centralize blitability determination (Jason, Jordan)

v3: Rebased with some non-trivial conflicts on Anuj's blitter fixes.

References: http://patchwork.freedesktop.org/patch/38909/
Cc: Jordan Justen jordan.l.jus...@intel.com
Signed-off-by: Ben Widawsky b...@bwidawsk.net
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 55 +++
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 21 ++
 2 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 16bd151..ee8fae4 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -86,7 +86,6 @@ compute_msaa_layout(struct brw_context *brw, mesa_format 
format, GLenum target)
}
 }
 
-
 /**
  * For single-sampled render targets (non-MSRT), the MCS buffer is a
  * scaled-down bitfield representation of the color buffer which is capable of
@@ -437,6 +436,12 @@ intel_miptree_create_layout(struct brw_context *brw,
return mt;
 }
 
+static bool
+miptree_exceeds_blit_height(struct intel_mipmap_tree *mt)
+{
+   return intel_miptree_blit_height(mt) = intel_blit_tile_height(mt-tiling);
+}
+
 /**
  * \brief Helper function for intel_miptree_create().
  */
@@ -502,10 +507,22 @@ intel_miptree_choose_tiling(struct brw_context *brw,
   return I915_TILING_NONE;
 
if (ALIGN(minimum_pitch, 512) = INTEL_MAX_BLIT_PITCH ||
-   mt-total_width = INTEL_MAX_BLIT_PITCH ||
-   mt-total_height = INTEL_MAX_BLIT_ROWS) {
-  perf_debug(%dx%d miptree too large to blit, falling back to untiled,
- mt-total_width, mt-total_height);
+   miptree_exceeds_blit_height(mt)) {
+  if (mt-format == GL_TEXTURE_3D) {
+ perf_debug(Unsupported large 3D texture blit. 
+Falling back to untiled.\n);
+  } else {
+ /* qpitch should always be greater than or equal to the tile aligned
+  * maximum of lod0 height.  That is sufficient to determine if we can
+  * blit, but the most optimal value is potentially less.
+  */
+ if (mt-physical_height0  INTEL_MAX_BLIT_ROWS) {
+perf_debug(Potentially skipped a blittable buffers %d\n,
+  mt-physical_height0);
+ }
+ perf_debug(%dx%d miptree too large to blit, falling back to untiled,
+mt-total_width, mt-total_height);
+  }
   return I915_TILING_NONE;
}
 
@@ -647,12 +664,18 @@ intel_miptree_create(struct brw_context *brw,
   BO_ALLOC_FOR_RENDER : 0));
mt-pitch = pitch;
 
+   uint32_t size =
+  mt-tiling ? ALIGN(intel_miptree_blit_height(mt) * mt-pitch,
+ intel_blit_tile_height(mt-tiling)) :
+  mt-bo-size;
+   assert(size = mt-bo-size);
+
/* If the BO is too large to fit in the aperture, we need to use the
 * BLT engine to support it.  Prior to Sandybridge, the BLT paths can't
 * handle Y-tiling, so we need to fall back to X.
 */
if (brw-gen  6 
-   mt-bo-size = brw-max_gtt_map_object_size 
+   size = brw-max_gtt_map_object_size 
mt-tiling == I915_TILING_Y 
requested_tiling == INTEL_MIPTREE_TILING_ANY) {
   perf_debug(%dx%d miptree larger than aperture; falling back to 
X-tiled\n,
@@ -2290,23 +2313,7 @@ intel_miptree_release_map(struct intel_mipmap_tree *mt,
*map = NULL;
 }
 
-static bool
-can_blit_slice(struct intel_mipmap_tree *mt,
-   unsigned int level, unsigned int slice)
-{
-   uint32_t image_x;
-   uint32_t image_y;
-   intel_miptree_get_image_offset(mt, level, slice, image_x, image_y);
-   if (image_x = INTEL_MAX_BLIT_PITCH || image_y = INTEL_MAX_BLIT_ROWS)
-  return false;
-
/* See intel_miptree_blit() for details on the 32k pitch limit. */
-   if (mt-pitch = INTEL_MAX_BLIT_PITCH)
-  return false;
-
-   return true;
-}
-
 static bool