Re: [Mesa-dev] [PATCH v3] i965/miptree: Use cpu tiling/detiling when mapping

2018-03-20 Thread Nanley Chery
On Wed, Mar 14, 2018 at 05:18:58PM +, Chris Wilson wrote:
> Quoting Nanley Chery (2018-03-14 17:14:15)
> > On Mon, Mar 12, 2018 at 10:52:55AM -0700, Scott D Phillips wrote:
> > > Rename the (un)map_gtt functions to (un)map_map (map by
> > > returning a map) and add new functions (un)map_tiled_memcpy that
> > > return a shadow buffer populated with the intel_tiled_memcpy
> > > functions.
> > >
> > > Tiling/detiling with the cpu will be the only way to handle Yf/Ys
> > > tiling, when support is added for those formats.
> > >
> > > v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson)
> > >
> > > v3: Add units to parameter names of tile_extents (Nanley Chery)
> > > Use _mesa_align_malloc for the shadow copy (Nanley)
> > > Continue using gtt maps on gen4 (Nanley)
> > > ---
> > >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 94 
> > > ---
> > >  1 file changed, 86 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > index c6213b21629..fba17bf5b7b 100644
> > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > @@ -31,6 +31,7 @@
> > >  #include "intel_image.h"
> > >  #include "intel_mipmap_tree.h"
> > >  #include "intel_tex.h"
> > > +#include "intel_tiled_memcpy.h"
> > >  #include "intel_blit.h"
> > >  #include "intel_fbo.h"
> > >  
> > > @@ -3046,10 +3047,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree 
> > > *mt)
> > >  }
> > >  
> > >  static void
> > > -intel_miptree_map_gtt(struct brw_context *brw,
> > > -  struct intel_mipmap_tree *mt,
> > > -  struct intel_miptree_map *map,
> > > -  unsigned int level, unsigned int slice)
> > > +intel_miptree_map_map(struct brw_context *brw,
> > > +  struct intel_mipmap_tree *mt,
> > > +  struct intel_miptree_map *map,
> > > +  unsigned int level, unsigned int slice)
> > >  {
> > > unsigned int bw, bh;
> > > void *base;
> > > @@ -3093,11 +3094,81 @@ intel_miptree_map_gtt(struct brw_context *brw,
> > >  }
> > >  
> > >  static void
> > > -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
> > > +intel_miptree_unmap_map(struct intel_mipmap_tree *mt)
> > >  {
> > > intel_miptree_unmap_raw(mt);
> > >  }
> > >  
> > > +/* Compute extent parameters for use with tiled_memcpy functions.
> > > + * xs are in units of bytes and ys are in units of strides. */
> > > +static inline void
> > > +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
> > > + unsigned int level, unsigned int slice, unsigned int *x1_B,
> > > + unsigned int *x2_B, unsigned int *y1_el, unsigned int 
> > > *y2_el)
> > > +{
> > > +   unsigned int block_width, block_height;
> > > +   unsigned int x0_el, y0_el;
> > > +
> > > +   _mesa_get_format_block_size(mt->format, &block_width, &block_height);
> > > +
> > > +   assert(map->x % block_width == 0);
> > > +   assert(map->y % block_height == 0);
> > > +
> > > +   intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
> > > +   *x1_B = (map->x / block_width + x0_el) * mt->cpp;
> > > +   *y1_el = map->y / block_height + y0_el;
> > > +   *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * 
> > > mt->cpp;
> > > +   *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
> > > +}
> > > +
> > > +static void
> > > +intel_miptree_map_tiled_memcpy(struct brw_context *brw,
> > > +   struct intel_mipmap_tree *mt,
> > > +   struct intel_miptree_map *map,
> > > +   unsigned int level, unsigned int slice)
> > > +{
> > > +   unsigned int x1, x2, y1, y2;
> > > +   tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
> > > +   map->stride = _mesa_format_row_stride(mt->format, map->w);
> > > +   map->buffer = map->ptr = _mesa_align_malloc(map->stride * (y2 - y1), 
> > > 16);
> > > +
> > > +   assert(map->ptr);
> > > +
> > > +   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
> > 
> > It looks like we'll generate extra copies using this function, but only
> > in a few corner cases. I think the following places should be using the
> > INVALIDATE flag, but aren't:
> > * _mesa_store_cleartexsubimage
> > * generate_mipmap_uncompressed
> > 
> > > +  char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> > > +  src += mt->offset;
> > > +
> > 
> > It seems possible that the buffer object had a WC memory type during
> > rendering. In that case, we need an sfence here right?
> > 
> > This stuff is pretty new to me, so perhaps others would like to chime
> > in.
> > 

From talking to Ben on IRC and through my reading of the following
section in the HW docs: Memory Types and Applicability to GFX, it seems
that WC from GFX perspecitive is basically UC and doesn't use WC buffers:

   Write Combining 

Re: [Mesa-dev] [PATCH v3] i965/miptree: Use cpu tiling/detiling when mapping

2018-03-14 Thread Chris Wilson
Quoting Nanley Chery (2018-03-14 17:14:15)
> On Mon, Mar 12, 2018 at 10:52:55AM -0700, Scott D Phillips wrote:
> > Rename the (un)map_gtt functions to (un)map_map (map by
> > returning a map) and add new functions (un)map_tiled_memcpy that
> > return a shadow buffer populated with the intel_tiled_memcpy
> > functions.
> >
> > Tiling/detiling with the cpu will be the only way to handle Yf/Ys
> > tiling, when support is added for those formats.
> >
> > v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson)
> >
> > v3: Add units to parameter names of tile_extents (Nanley Chery)
> > Use _mesa_align_malloc for the shadow copy (Nanley)
> > Continue using gtt maps on gen4 (Nanley)
> > ---
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 94 
> > ---
> >  1 file changed, 86 insertions(+), 8 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index c6213b21629..fba17bf5b7b 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -31,6 +31,7 @@
> >  #include "intel_image.h"
> >  #include "intel_mipmap_tree.h"
> >  #include "intel_tex.h"
> > +#include "intel_tiled_memcpy.h"
> >  #include "intel_blit.h"
> >  #include "intel_fbo.h"
> >  
> > @@ -3046,10 +3047,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree 
> > *mt)
> >  }
> >  
> >  static void
> > -intel_miptree_map_gtt(struct brw_context *brw,
> > -  struct intel_mipmap_tree *mt,
> > -  struct intel_miptree_map *map,
> > -  unsigned int level, unsigned int slice)
> > +intel_miptree_map_map(struct brw_context *brw,
> > +  struct intel_mipmap_tree *mt,
> > +  struct intel_miptree_map *map,
> > +  unsigned int level, unsigned int slice)
> >  {
> > unsigned int bw, bh;
> > void *base;
> > @@ -3093,11 +3094,81 @@ intel_miptree_map_gtt(struct brw_context *brw,
> >  }
> >  
> >  static void
> > -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
> > +intel_miptree_unmap_map(struct intel_mipmap_tree *mt)
> >  {
> > intel_miptree_unmap_raw(mt);
> >  }
> >  
> > +/* Compute extent parameters for use with tiled_memcpy functions.
> > + * xs are in units of bytes and ys are in units of strides. */
> > +static inline void
> > +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
> > + unsigned int level, unsigned int slice, unsigned int *x1_B,
> > + unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
> > +{
> > +   unsigned int block_width, block_height;
> > +   unsigned int x0_el, y0_el;
> > +
> > +   _mesa_get_format_block_size(mt->format, &block_width, &block_height);
> > +
> > +   assert(map->x % block_width == 0);
> > +   assert(map->y % block_height == 0);
> > +
> > +   intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
> > +   *x1_B = (map->x / block_width + x0_el) * mt->cpp;
> > +   *y1_el = map->y / block_height + y0_el;
> > +   *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
> > +   *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
> > +}
> > +
> > +static void
> > +intel_miptree_map_tiled_memcpy(struct brw_context *brw,
> > +   struct intel_mipmap_tree *mt,
> > +   struct intel_miptree_map *map,
> > +   unsigned int level, unsigned int slice)
> > +{
> > +   unsigned int x1, x2, y1, y2;
> > +   tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
> > +   map->stride = _mesa_format_row_stride(mt->format, map->w);
> > +   map->buffer = map->ptr = _mesa_align_malloc(map->stride * (y2 - y1), 
> > 16);
> > +
> > +   assert(map->ptr);
> > +
> > +   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
> 
> It looks like we'll generate extra copies using this function, but only
> in a few corner cases. I think the following places should be using the
> INVALIDATE flag, but aren't:
> * _mesa_store_cleartexsubimage
> * generate_mipmap_uncompressed
> 
> > +  char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> > +  src += mt->offset;
> > +
> 
> It seems possible that the buffer object had a WC memory type during
> rendering. In that case, we need an sfence here right?
> 
> This stuff is pretty new to me, so perhaps others would like to chime
> in.
> 
> > +  tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
> > +  mt->surf.row_pitch, brw->has_swizzling, 
> > mt->surf.tiling,
> > +  memcpy);
> > +
> > +  intel_miptree_unmap_raw(mt);
> > +   }
> > +}
> > +
> > +static void
> > +intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
> > + struct intel_mipmap_tree *mt,
> > + struct intel_miptree_map *map,
> > + unsigned int lev

Re: [Mesa-dev] [PATCH v3] i965/miptree: Use cpu tiling/detiling when mapping

2018-03-14 Thread Nanley Chery
On Mon, Mar 12, 2018 at 10:52:55AM -0700, Scott D Phillips wrote:
> Rename the (un)map_gtt functions to (un)map_map (map by
> returning a map) and add new functions (un)map_tiled_memcpy that
> return a shadow buffer populated with the intel_tiled_memcpy
> functions.
>
> Tiling/detiling with the cpu will be the only way to handle Yf/Ys
> tiling, when support is added for those formats.
>
> v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson)
>
> v3: Add units to parameter names of tile_extents (Nanley Chery)
> Use _mesa_align_malloc for the shadow copy (Nanley)
> Continue using gtt maps on gen4 (Nanley)
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 94 
> ---
>  1 file changed, 86 insertions(+), 8 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index c6213b21629..fba17bf5b7b 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -31,6 +31,7 @@
>  #include "intel_image.h"
>  #include "intel_mipmap_tree.h"
>  #include "intel_tex.h"
> +#include "intel_tiled_memcpy.h"
>  #include "intel_blit.h"
>  #include "intel_fbo.h"
>  
> @@ -3046,10 +3047,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
>  }
>  
>  static void
> -intel_miptree_map_gtt(struct brw_context *brw,
> -  struct intel_mipmap_tree *mt,
> -  struct intel_miptree_map *map,
> -  unsigned int level, unsigned int slice)
> +intel_miptree_map_map(struct brw_context *brw,
> +  struct intel_mipmap_tree *mt,
> +  struct intel_miptree_map *map,
> +  unsigned int level, unsigned int slice)
>  {
> unsigned int bw, bh;
> void *base;
> @@ -3093,11 +3094,81 @@ intel_miptree_map_gtt(struct brw_context *brw,
>  }
>  
>  static void
> -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
> +intel_miptree_unmap_map(struct intel_mipmap_tree *mt)
>  {
> intel_miptree_unmap_raw(mt);
>  }
>  
> +/* Compute extent parameters for use with tiled_memcpy functions.
> + * xs are in units of bytes and ys are in units of strides. */
> +static inline void
> +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
> + unsigned int level, unsigned int slice, unsigned int *x1_B,
> + unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
> +{
> +   unsigned int block_width, block_height;
> +   unsigned int x0_el, y0_el;
> +
> +   _mesa_get_format_block_size(mt->format, &block_width, &block_height);
> +
> +   assert(map->x % block_width == 0);
> +   assert(map->y % block_height == 0);
> +
> +   intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
> +   *x1_B = (map->x / block_width + x0_el) * mt->cpp;
> +   *y1_el = map->y / block_height + y0_el;
> +   *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
> +   *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
> +}
> +
> +static void
> +intel_miptree_map_tiled_memcpy(struct brw_context *brw,
> +   struct intel_mipmap_tree *mt,
> +   struct intel_miptree_map *map,
> +   unsigned int level, unsigned int slice)
> +{
> +   unsigned int x1, x2, y1, y2;
> +   tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
> +   map->stride = _mesa_format_row_stride(mt->format, map->w);
> +   map->buffer = map->ptr = _mesa_align_malloc(map->stride * (y2 - y1), 16);
> +
> +   assert(map->ptr);
> +
> +   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {

It looks like we'll generate extra copies using this function, but only
in a few corner cases. I think the following places should be using the
INVALIDATE flag, but aren't:
* _mesa_store_cleartexsubimage
* generate_mipmap_uncompressed

> +  char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> +  src += mt->offset;
> +

It seems possible that the buffer object had a WC memory type during
rendering. In that case, we need an sfence here right?

This stuff is pretty new to me, so perhaps others would like to chime
in.

> +  tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
> +  mt->surf.row_pitch, brw->has_swizzling, 
> mt->surf.tiling,
> +  memcpy);
> +
> +  intel_miptree_unmap_raw(mt);
> +   }
> +}
> +
> +static void
> +intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
> + struct intel_mipmap_tree *mt,
> + struct intel_miptree_map *map,
> + unsigned int level,
> + unsigned int slice)
> +{
> +   if (map->mode & GL_MAP_WRITE_BIT) {
> +  unsigned int x1, x2, y1, y2;
> +  tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
> +
> +  char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> +  dst

[Mesa-dev] [PATCH v3] i965/miptree: Use cpu tiling/detiling when mapping

2018-03-12 Thread Scott D Phillips
Rename the (un)map_gtt functions to (un)map_map (map by
returning a map) and add new functions (un)map_tiled_memcpy that
return a shadow buffer populated with the intel_tiled_memcpy
functions.

Tiling/detiling with the cpu will be the only way to handle Yf/Ys
tiling, when support is added for those formats.

v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson)

v3: Add units to parameter names of tile_extents (Nanley Chery)
Use _mesa_align_malloc for the shadow copy (Nanley)
Continue using gtt maps on gen4 (Nanley)
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 94 ---
 1 file changed, 86 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index c6213b21629..fba17bf5b7b 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -31,6 +31,7 @@
 #include "intel_image.h"
 #include "intel_mipmap_tree.h"
 #include "intel_tex.h"
+#include "intel_tiled_memcpy.h"
 #include "intel_blit.h"
 #include "intel_fbo.h"
 
@@ -3046,10 +3047,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
 }
 
 static void
-intel_miptree_map_gtt(struct brw_context *brw,
- struct intel_mipmap_tree *mt,
- struct intel_miptree_map *map,
- unsigned int level, unsigned int slice)
+intel_miptree_map_map(struct brw_context *brw,
+  struct intel_mipmap_tree *mt,
+  struct intel_miptree_map *map,
+  unsigned int level, unsigned int slice)
 {
unsigned int bw, bh;
void *base;
@@ -3093,11 +3094,81 @@ intel_miptree_map_gtt(struct brw_context *brw,
 }
 
 static void
-intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
+intel_miptree_unmap_map(struct intel_mipmap_tree *mt)
 {
intel_miptree_unmap_raw(mt);
 }
 
+/* Compute extent parameters for use with tiled_memcpy functions.
+ * xs are in units of bytes and ys are in units of strides. */
+static inline void
+tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
+ unsigned int level, unsigned int slice, unsigned int *x1_B,
+ unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
+{
+   unsigned int block_width, block_height;
+   unsigned int x0_el, y0_el;
+
+   _mesa_get_format_block_size(mt->format, &block_width, &block_height);
+
+   assert(map->x % block_width == 0);
+   assert(map->y % block_height == 0);
+
+   intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
+   *x1_B = (map->x / block_width + x0_el) * mt->cpp;
+   *y1_el = map->y / block_height + y0_el;
+   *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
+   *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
+}
+
+static void
+intel_miptree_map_tiled_memcpy(struct brw_context *brw,
+   struct intel_mipmap_tree *mt,
+   struct intel_miptree_map *map,
+   unsigned int level, unsigned int slice)
+{
+   unsigned int x1, x2, y1, y2;
+   tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
+   map->stride = _mesa_format_row_stride(mt->format, map->w);
+   map->buffer = map->ptr = _mesa_align_malloc(map->stride * (y2 - y1), 16);
+
+   assert(map->ptr);
+
+   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
+  char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
+  src += mt->offset;
+
+  tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
+  mt->surf.row_pitch, brw->has_swizzling, mt->surf.tiling,
+  memcpy);
+
+  intel_miptree_unmap_raw(mt);
+   }
+}
+
+static void
+intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ struct intel_miptree_map *map,
+ unsigned int level,
+ unsigned int slice)
+{
+   if (map->mode & GL_MAP_WRITE_BIT) {
+  unsigned int x1, x2, y1, y2;
+  tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
+
+  char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
+  dst += mt->offset;
+
+  linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch,
+  map->stride, brw->has_swizzling, mt->surf.tiling, 
memcpy);
+
+  intel_miptree_unmap_raw(mt);
+   }
+   _mesa_align_free(map->buffer);
+   map->buffer = map->ptr = NULL;
+}
+
 static void
 intel_miptree_map_blit(struct brw_context *brw,
   struct intel_mipmap_tree *mt,
@@ -3655,8 +3726,10 @@ intel_miptree_map(struct brw_context *brw,
   (mt->surf.row_pitch % 16 == 0)) {
   intel_miptree_map_movntdqa(brw, mt, map, level, slice);
 #endif
+   } else if (mt->surf.tiling != ISL_TILING_LINEAR && brw->screen->devinfo.gen 
> 4) {
+  i