Re: [Mesa-dev] [PATCH 2/5] i965/miptree: Use cpu tiling/detiling when mapping

2018-03-06 Thread Nanley Chery
On Tue, Mar 06, 2018 at 08:01:47AM -0800, Scott D Phillips wrote:
> Jason Ekstrand  writes:
> 
> > On Mon, Mar 5, 2018 at 11:39 AM, Nanley Chery  wrote:
> >
> >> On Tue, Jan 09, 2018 at 11:16:59PM -0800, Scott D Phillips wrote:
> >> > Rename the (un)map_gtt functions to (un)map_map (map by
> >> > returning a map) and add new functions (un)map_tiled_memcpy that
> >> > return a shadow buffer populated with the intel_tiled_memcpy
> >> > functions.
> >>
> >> Could you mention some of the rationale?
> 
> One is that gtt maps cannot detile the newer Yf and Ys layouts. Not sure
> if Jason or Ken know other reasons. I think we suspected some
> performance difference, but I didn't really see any with well formed
> bulk uploads or downloads.
> 

The first reason is plenty in my opinion.

> >> > ---
> >> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 95
> >> ---
> >> >  1 file changed, 86 insertions(+), 9 deletions(-)
> >> >
> >> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> >> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> >> > index ead0c359c0..7a90dafa1e 100644
> >> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> >> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> >> > @@ -31,6 +31,7 @@
> >> >  #include "intel_image.h"
> >> >  #include "intel_mipmap_tree.h"
> >> >  #include "intel_tex.h"
> >> > +#include "intel_tiled_memcpy.h"
> >> >  #include "intel_blit.h"
> >> >  #include "intel_fbo.h"
> >> >
> >> > @@ -3031,10 +3032,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree
> >> *mt)
> >> >  }
> >> >
> >> >  static void
> >> > -intel_miptree_map_gtt(struct brw_context *brw,
> >> > -   struct intel_mipmap_tree *mt,
> >> > -   struct intel_miptree_map *map,
> >> > -   unsigned int level, unsigned int slice)
> >> > +intel_miptree_map_map(struct brw_context *brw,
> >> > +  struct intel_mipmap_tree *mt,
> >> > +  struct intel_miptree_map *map,
> >> > +  unsigned int level, unsigned int slice)
> >> >  {
> >> > unsigned int bw, bh;
> >> > void *base;
> >> > @@ -3052,7 +3053,7 @@ intel_miptree_map_gtt(struct brw_context *brw,
> >> > y /= bh;
> >> > x /= bw;
> >> >
> >> > -   base = intel_miptree_map_raw(brw, mt, map->mode);
> >> > +   base = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> >> >
> >> > if (base == NULL)
> >> >map->ptr = NULL;
> >> > @@ -3078,11 +3079,80 @@ intel_miptree_map_gtt(struct brw_context *brw,
> >> >  }
> >> >
> >> >  static void
> >> > -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
> >> > +intel_miptree_unmap_map(struct intel_mipmap_tree *mt)
> >> >  {
> >> > intel_miptree_unmap_raw(mt);
> >> >  }
> >> >
> >> > +/* Compute extent parameters for use with tiled_memcpy functions.
> >> > + * xs are in units of bytes and ys are in units of strides. */
> >> > +static inline void
> >> > +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map
> >> *map,
> >> > + unsigned int level, unsigned int slice, unsigned int *x1,
> >> > + unsigned int *x2, unsigned int *y1, unsigned int *y2)
> >>
> >> It would be nice to give these variables units:
> >> x1_B, y1_el, etc.
> 
> Sure, will do
> 
> >> > +{
> >> > +   unsigned int block_width, block_height, block_bytes;
> >> > +   unsigned int x0_el, y0_el;
> >> > +
> >> > +   _mesa_get_format_block_size(mt->format, _width,
> >> _height);
> >> > +   block_bytes = _mesa_get_format_bytes(mt->format);
> >>
> >> block_bytes == mt->cpp (in theory anyways)
> 
> Ok, will change to that
> 
> >> > +
> >> > +   assert(map->x % block_width == 0);
> >> > +   assert(map->y % block_height == 0);
> >> > +
> >> > +   intel_miptree_get_image_offset(mt, level, slice, _el, _el);
> >> > +   *x1 = (map->x / block_width + x0_el) * block_bytes;
> >> > +   *y1 = map->y / block_height + y0_el;
> >> > +   *x2 = *x1 + DIV_ROUND_UP(map->w, block_width) * block_bytes;
> >> > +   *y2 = *y1 + DIV_ROUND_UP(map->h, block_height);
> >> > +}
> >> > +
> >> > +static void
> >> > +intel_miptree_map_tiled_memcpy(struct brw_context *brw,
> >> > +   struct intel_mipmap_tree *mt,
> >> > +   struct intel_miptree_map *map,
> >> > +   unsigned int level, unsigned int slice)
> >> > +{
> >> > +   unsigned int x1, x2, y1, y2;
> >> > +   tile_extents(mt, map, level, slice, , , , );
> >> > +   map->stride = _mesa_format_row_stride(mt->format, map->w);
> >> > +   map->buffer = map->ptr = malloc(map->stride * (y2 - y1));
> >>
> >> Using _mesa_align_malloc should improve memory accesses for our 128bit
> >> formats.
> >>
> >> We should probably also assert(map->ptr) or throw a GL_OUT_OF_MEMORY
> >> error if the alloc fails.
> 
> Good suggestions, will do that
> 
> >> > +
> >> > +   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
> >> > +  

Re: [Mesa-dev] [PATCH 2/5] i965/miptree: Use cpu tiling/detiling when mapping

2018-03-06 Thread Scott D Phillips
Jason Ekstrand  writes:

> On Mon, Mar 5, 2018 at 11:39 AM, Nanley Chery  wrote:
>
>> On Tue, Jan 09, 2018 at 11:16:59PM -0800, Scott D Phillips wrote:
>> > Rename the (un)map_gtt functions to (un)map_map (map by
>> > returning a map) and add new functions (un)map_tiled_memcpy that
>> > return a shadow buffer populated with the intel_tiled_memcpy
>> > functions.
>>
>> Could you mention some of the rationale?

One is that gtt maps cannot detile the newer Yf and Ys layouts. Not sure
if Jason or Ken know other reasons. I think we suspected some
performance difference, but I didn't really see any with well formed
bulk uploads or downloads.

>> > ---
>> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 95
>> ---
>> >  1 file changed, 86 insertions(+), 9 deletions(-)
>> >
>> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> > index ead0c359c0..7a90dafa1e 100644
>> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> > @@ -31,6 +31,7 @@
>> >  #include "intel_image.h"
>> >  #include "intel_mipmap_tree.h"
>> >  #include "intel_tex.h"
>> > +#include "intel_tiled_memcpy.h"
>> >  #include "intel_blit.h"
>> >  #include "intel_fbo.h"
>> >
>> > @@ -3031,10 +3032,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree
>> *mt)
>> >  }
>> >
>> >  static void
>> > -intel_miptree_map_gtt(struct brw_context *brw,
>> > -   struct intel_mipmap_tree *mt,
>> > -   struct intel_miptree_map *map,
>> > -   unsigned int level, unsigned int slice)
>> > +intel_miptree_map_map(struct brw_context *brw,
>> > +  struct intel_mipmap_tree *mt,
>> > +  struct intel_miptree_map *map,
>> > +  unsigned int level, unsigned int slice)
>> >  {
>> > unsigned int bw, bh;
>> > void *base;
>> > @@ -3052,7 +3053,7 @@ intel_miptree_map_gtt(struct brw_context *brw,
>> > y /= bh;
>> > x /= bw;
>> >
>> > -   base = intel_miptree_map_raw(brw, mt, map->mode);
>> > +   base = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
>> >
>> > if (base == NULL)
>> >map->ptr = NULL;
>> > @@ -3078,11 +3079,80 @@ intel_miptree_map_gtt(struct brw_context *brw,
>> >  }
>> >
>> >  static void
>> > -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
>> > +intel_miptree_unmap_map(struct intel_mipmap_tree *mt)
>> >  {
>> > intel_miptree_unmap_raw(mt);
>> >  }
>> >
>> > +/* Compute extent parameters for use with tiled_memcpy functions.
>> > + * xs are in units of bytes and ys are in units of strides. */
>> > +static inline void
>> > +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map
>> *map,
>> > + unsigned int level, unsigned int slice, unsigned int *x1,
>> > + unsigned int *x2, unsigned int *y1, unsigned int *y2)
>>
>> It would be nice to give these variables units:
>> x1_B, y1_el, etc.

Sure, will do

>> > +{
>> > +   unsigned int block_width, block_height, block_bytes;
>> > +   unsigned int x0_el, y0_el;
>> > +
>> > +   _mesa_get_format_block_size(mt->format, _width,
>> _height);
>> > +   block_bytes = _mesa_get_format_bytes(mt->format);
>>
>> block_bytes == mt->cpp (in theory anyways)

Ok, will change to that

>> > +
>> > +   assert(map->x % block_width == 0);
>> > +   assert(map->y % block_height == 0);
>> > +
>> > +   intel_miptree_get_image_offset(mt, level, slice, _el, _el);
>> > +   *x1 = (map->x / block_width + x0_el) * block_bytes;
>> > +   *y1 = map->y / block_height + y0_el;
>> > +   *x2 = *x1 + DIV_ROUND_UP(map->w, block_width) * block_bytes;
>> > +   *y2 = *y1 + DIV_ROUND_UP(map->h, block_height);
>> > +}
>> > +
>> > +static void
>> > +intel_miptree_map_tiled_memcpy(struct brw_context *brw,
>> > +   struct intel_mipmap_tree *mt,
>> > +   struct intel_miptree_map *map,
>> > +   unsigned int level, unsigned int slice)
>> > +{
>> > +   unsigned int x1, x2, y1, y2;
>> > +   tile_extents(mt, map, level, slice, , , , );
>> > +   map->stride = _mesa_format_row_stride(mt->format, map->w);
>> > +   map->buffer = map->ptr = malloc(map->stride * (y2 - y1));
>>
>> Using _mesa_align_malloc should improve memory accesses for our 128bit
>> formats.
>>
>> We should probably also assert(map->ptr) or throw a GL_OUT_OF_MEMORY
>> error if the alloc fails.

Good suggestions, will do that

>> > +
>> > +   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
>> > +  char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
>> > +  src += mt->offset;
>> > +
>> > +  tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
>> > +  mt->surf.row_pitch, brw->has_swizzling,
>> mt->surf.tiling,
>> > +  memcpy);
>> > +
>> > +  intel_miptree_unmap_raw(mt);
>> > +   }
>> > +}

Re: [Mesa-dev] [PATCH 2/5] i965/miptree: Use cpu tiling/detiling when mapping

2018-03-05 Thread Jason Ekstrand
On Mon, Mar 5, 2018 at 11:39 AM, Nanley Chery  wrote:

> On Tue, Jan 09, 2018 at 11:16:59PM -0800, Scott D Phillips wrote:
> > Rename the (un)map_gtt functions to (un)map_map (map by
> > returning a map) and add new functions (un)map_tiled_memcpy that
> > return a shadow buffer populated with the intel_tiled_memcpy
> > functions.
>
> Could you mention some of the rationale?
>
> > ---
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 95
> ---
> >  1 file changed, 86 insertions(+), 9 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index ead0c359c0..7a90dafa1e 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -31,6 +31,7 @@
> >  #include "intel_image.h"
> >  #include "intel_mipmap_tree.h"
> >  #include "intel_tex.h"
> > +#include "intel_tiled_memcpy.h"
> >  #include "intel_blit.h"
> >  #include "intel_fbo.h"
> >
> > @@ -3031,10 +3032,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree
> *mt)
> >  }
> >
> >  static void
> > -intel_miptree_map_gtt(struct brw_context *brw,
> > -   struct intel_mipmap_tree *mt,
> > -   struct intel_miptree_map *map,
> > -   unsigned int level, unsigned int slice)
> > +intel_miptree_map_map(struct brw_context *brw,
> > +  struct intel_mipmap_tree *mt,
> > +  struct intel_miptree_map *map,
> > +  unsigned int level, unsigned int slice)
> >  {
> > unsigned int bw, bh;
> > void *base;
> > @@ -3052,7 +3053,7 @@ intel_miptree_map_gtt(struct brw_context *brw,
> > y /= bh;
> > x /= bw;
> >
> > -   base = intel_miptree_map_raw(brw, mt, map->mode);
> > +   base = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> >
> > if (base == NULL)
> >map->ptr = NULL;
> > @@ -3078,11 +3079,80 @@ intel_miptree_map_gtt(struct brw_context *brw,
> >  }
> >
> >  static void
> > -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
> > +intel_miptree_unmap_map(struct intel_mipmap_tree *mt)
> >  {
> > intel_miptree_unmap_raw(mt);
> >  }
> >
> > +/* Compute extent parameters for use with tiled_memcpy functions.
> > + * xs are in units of bytes and ys are in units of strides. */
> > +static inline void
> > +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map
> *map,
> > + unsigned int level, unsigned int slice, unsigned int *x1,
> > + unsigned int *x2, unsigned int *y1, unsigned int *y2)
>
> It would be nice to give these variables units:
> x1_B, y1_el, etc.
>
> > +{
> > +   unsigned int block_width, block_height, block_bytes;
> > +   unsigned int x0_el, y0_el;
> > +
> > +   _mesa_get_format_block_size(mt->format, _width,
> _height);
> > +   block_bytes = _mesa_get_format_bytes(mt->format);
>
> block_bytes == mt->cpp (in theory anyways)
>
> > +
> > +   assert(map->x % block_width == 0);
> > +   assert(map->y % block_height == 0);
> > +
> > +   intel_miptree_get_image_offset(mt, level, slice, _el, _el);
> > +   *x1 = (map->x / block_width + x0_el) * block_bytes;
> > +   *y1 = map->y / block_height + y0_el;
> > +   *x2 = *x1 + DIV_ROUND_UP(map->w, block_width) * block_bytes;
> > +   *y2 = *y1 + DIV_ROUND_UP(map->h, block_height);
> > +}
> > +
> > +static void
> > +intel_miptree_map_tiled_memcpy(struct brw_context *brw,
> > +   struct intel_mipmap_tree *mt,
> > +   struct intel_miptree_map *map,
> > +   unsigned int level, unsigned int slice)
> > +{
> > +   unsigned int x1, x2, y1, y2;
> > +   tile_extents(mt, map, level, slice, , , , );
> > +   map->stride = _mesa_format_row_stride(mt->format, map->w);
> > +   map->buffer = map->ptr = malloc(map->stride * (y2 - y1));
>
> Using _mesa_align_malloc should improve memory accesses for our 128bit
> formats.
>
> We should probably also assert(map->ptr) or throw a GL_OUT_OF_MEMORY
> error if the alloc fails.
>
> > +
> > +   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
> > +  char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> > +  src += mt->offset;
> > +
> > +  tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
> > +  mt->surf.row_pitch, brw->has_swizzling,
> mt->surf.tiling,
> > +  memcpy);
> > +
> > +  intel_miptree_unmap_raw(mt);
> > +   }
> > +}
> > +
> > +static void
> > +intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
> > + struct intel_mipmap_tree *mt,
> > + struct intel_miptree_map *map,
> > + unsigned int level,
> > + unsigned int slice)
> > +{
> > +   if (map->mode & GL_MAP_WRITE_BIT) {
> > +  unsigned int x1, x2, y1, y2;
> > +  tile_extents(mt, map, level, 

Re: [Mesa-dev] [PATCH 2/5] i965/miptree: Use cpu tiling/detiling when mapping

2018-03-05 Thread Nanley Chery
On Tue, Jan 09, 2018 at 11:16:59PM -0800, Scott D Phillips wrote:
> Rename the (un)map_gtt functions to (un)map_map (map by
> returning a map) and add new functions (un)map_tiled_memcpy that
> return a shadow buffer populated with the intel_tiled_memcpy
> functions.

Could you mention some of the rationale?

> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 95 
> ---
>  1 file changed, 86 insertions(+), 9 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index ead0c359c0..7a90dafa1e 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -31,6 +31,7 @@
>  #include "intel_image.h"
>  #include "intel_mipmap_tree.h"
>  #include "intel_tex.h"
> +#include "intel_tiled_memcpy.h"
>  #include "intel_blit.h"
>  #include "intel_fbo.h"
>  
> @@ -3031,10 +3032,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
>  }
>  
>  static void
> -intel_miptree_map_gtt(struct brw_context *brw,
> -   struct intel_mipmap_tree *mt,
> -   struct intel_miptree_map *map,
> -   unsigned int level, unsigned int slice)
> +intel_miptree_map_map(struct brw_context *brw,
> +  struct intel_mipmap_tree *mt,
> +  struct intel_miptree_map *map,
> +  unsigned int level, unsigned int slice)
>  {
> unsigned int bw, bh;
> void *base;
> @@ -3052,7 +3053,7 @@ intel_miptree_map_gtt(struct brw_context *brw,
> y /= bh;
> x /= bw;
>  
> -   base = intel_miptree_map_raw(brw, mt, map->mode);
> +   base = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
>  
> if (base == NULL)
>map->ptr = NULL;
> @@ -3078,11 +3079,80 @@ intel_miptree_map_gtt(struct brw_context *brw,
>  }
>  
>  static void
> -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
> +intel_miptree_unmap_map(struct intel_mipmap_tree *mt)
>  {
> intel_miptree_unmap_raw(mt);
>  }
>  
> +/* Compute extent parameters for use with tiled_memcpy functions.
> + * xs are in units of bytes and ys are in units of strides. */
> +static inline void
> +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
> + unsigned int level, unsigned int slice, unsigned int *x1,
> + unsigned int *x2, unsigned int *y1, unsigned int *y2)

It would be nice to give these variables units:
x1_B, y1_el, etc.

> +{
> +   unsigned int block_width, block_height, block_bytes;
> +   unsigned int x0_el, y0_el;
> +
> +   _mesa_get_format_block_size(mt->format, _width, _height);
> +   block_bytes = _mesa_get_format_bytes(mt->format);

block_bytes == mt->cpp (in theory anyways)

> +
> +   assert(map->x % block_width == 0);
> +   assert(map->y % block_height == 0);
> +
> +   intel_miptree_get_image_offset(mt, level, slice, _el, _el);
> +   *x1 = (map->x / block_width + x0_el) * block_bytes;
> +   *y1 = map->y / block_height + y0_el;
> +   *x2 = *x1 + DIV_ROUND_UP(map->w, block_width) * block_bytes;
> +   *y2 = *y1 + DIV_ROUND_UP(map->h, block_height);
> +}
> +
> +static void
> +intel_miptree_map_tiled_memcpy(struct brw_context *brw,
> +   struct intel_mipmap_tree *mt,
> +   struct intel_miptree_map *map,
> +   unsigned int level, unsigned int slice)
> +{
> +   unsigned int x1, x2, y1, y2;
> +   tile_extents(mt, map, level, slice, , , , );
> +   map->stride = _mesa_format_row_stride(mt->format, map->w);
> +   map->buffer = map->ptr = malloc(map->stride * (y2 - y1));

Using _mesa_align_malloc should improve memory accesses for our 128bit formats.

We should probably also assert(map->ptr) or throw a GL_OUT_OF_MEMORY
error if the alloc fails.

> +
> +   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
> +  char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> +  src += mt->offset;
> +
> +  tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
> +  mt->surf.row_pitch, brw->has_swizzling, 
> mt->surf.tiling,
> +  memcpy);
> +
> +  intel_miptree_unmap_raw(mt);
> +   }
> +}
> +
> +static void
> +intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
> + struct intel_mipmap_tree *mt,
> + struct intel_miptree_map *map,
> + unsigned int level,
> + unsigned int slice)
> +{
> +   if (map->mode & GL_MAP_WRITE_BIT) {
> +  unsigned int x1, x2, y1, y2;
> +  tile_extents(mt, map, level, slice, , , , );
> +
> +  char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> +  dst += mt->offset;
> +
> +  linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch,
> +  map->stride, brw->has_swizzling, mt->surf.tiling, 
> memcpy);
> +
> +  

Re: [Mesa-dev] [PATCH 2/5] i965/miptree: Use cpu tiling/detiling when mapping

2018-01-23 Thread Chris Wilson
Quoting Scott D Phillips (2018-01-10 07:16:59)
> +/* Compute extent parameters for use with tiled_memcpy functions.
> + * xs are in units of bytes and ys are in units of strides. */
> +static inline void
> +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
> + unsigned int level, unsigned int slice, unsigned int *x1,
> + unsigned int *x2, unsigned int *y1, unsigned int *y2)
> +{
> +   unsigned int block_width, block_height, block_bytes;
> +   unsigned int x0_el, y0_el;
> +
> +   _mesa_get_format_block_size(mt->format, _width, _height);
> +   block_bytes = _mesa_get_format_bytes(mt->format);
> +
> +   assert(map->x % block_width == 0);
> +   assert(map->y % block_height == 0);
> +
> +   intel_miptree_get_image_offset(mt, level, slice, _el, _el);
> +   *x1 = (map->x / block_width + x0_el) * block_bytes;
> +   *y1 = map->y / block_height + y0_el;
> +   *x2 = *x1 + DIV_ROUND_UP(map->w, block_width) * block_bytes;
> +   *y2 = *y1 + DIV_ROUND_UP(map->h, block_height);
*x2 = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * block_bytes;
*y2 = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;

Otherwise you may underestimate the aligned block extents. Consider the
case where map->w == block_width, but map->x is misaligned. x1 is then
truncated to the start of the block, and x2 is moved to the end of that
block, but to the left of map->x + map->w.
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] i965/miptree: Use cpu tiling/detiling when mapping

2018-01-09 Thread Scott D Phillips
Rename the (un)map_gtt functions to (un)map_map (map by
returning a map) and add new functions (un)map_tiled_memcpy that
return a shadow buffer populated with the intel_tiled_memcpy
functions.
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 95 ---
 1 file changed, 86 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index ead0c359c0..7a90dafa1e 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -31,6 +31,7 @@
 #include "intel_image.h"
 #include "intel_mipmap_tree.h"
 #include "intel_tex.h"
+#include "intel_tiled_memcpy.h"
 #include "intel_blit.h"
 #include "intel_fbo.h"
 
@@ -3031,10 +3032,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
 }
 
 static void
-intel_miptree_map_gtt(struct brw_context *brw,
- struct intel_mipmap_tree *mt,
- struct intel_miptree_map *map,
- unsigned int level, unsigned int slice)
+intel_miptree_map_map(struct brw_context *brw,
+  struct intel_mipmap_tree *mt,
+  struct intel_miptree_map *map,
+  unsigned int level, unsigned int slice)
 {
unsigned int bw, bh;
void *base;
@@ -3052,7 +3053,7 @@ intel_miptree_map_gtt(struct brw_context *brw,
y /= bh;
x /= bw;
 
-   base = intel_miptree_map_raw(brw, mt, map->mode);
+   base = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
 
if (base == NULL)
   map->ptr = NULL;
@@ -3078,11 +3079,80 @@ intel_miptree_map_gtt(struct brw_context *brw,
 }
 
 static void
-intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
+intel_miptree_unmap_map(struct intel_mipmap_tree *mt)
 {
intel_miptree_unmap_raw(mt);
 }
 
+/* Compute extent parameters for use with tiled_memcpy functions.
+ * xs are in units of bytes and ys are in units of strides. */
+static inline void
+tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
+ unsigned int level, unsigned int slice, unsigned int *x1,
+ unsigned int *x2, unsigned int *y1, unsigned int *y2)
+{
+   unsigned int block_width, block_height, block_bytes;
+   unsigned int x0_el, y0_el;
+
+   _mesa_get_format_block_size(mt->format, _width, _height);
+   block_bytes = _mesa_get_format_bytes(mt->format);
+
+   assert(map->x % block_width == 0);
+   assert(map->y % block_height == 0);
+
+   intel_miptree_get_image_offset(mt, level, slice, _el, _el);
+   *x1 = (map->x / block_width + x0_el) * block_bytes;
+   *y1 = map->y / block_height + y0_el;
+   *x2 = *x1 + DIV_ROUND_UP(map->w, block_width) * block_bytes;
+   *y2 = *y1 + DIV_ROUND_UP(map->h, block_height);
+}
+
+static void
+intel_miptree_map_tiled_memcpy(struct brw_context *brw,
+   struct intel_mipmap_tree *mt,
+   struct intel_miptree_map *map,
+   unsigned int level, unsigned int slice)
+{
+   unsigned int x1, x2, y1, y2;
+   tile_extents(mt, map, level, slice, , , , );
+   map->stride = _mesa_format_row_stride(mt->format, map->w);
+   map->buffer = map->ptr = malloc(map->stride * (y2 - y1));
+
+   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
+  char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
+  src += mt->offset;
+
+  tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
+  mt->surf.row_pitch, brw->has_swizzling, mt->surf.tiling,
+  memcpy);
+
+  intel_miptree_unmap_raw(mt);
+   }
+}
+
+static void
+intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ struct intel_miptree_map *map,
+ unsigned int level,
+ unsigned int slice)
+{
+   if (map->mode & GL_MAP_WRITE_BIT) {
+  unsigned int x1, x2, y1, y2;
+  tile_extents(mt, map, level, slice, , , , );
+
+  char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
+  dst += mt->offset;
+
+  linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch,
+  map->stride, brw->has_swizzling, mt->surf.tiling, 
memcpy);
+
+  intel_miptree_unmap_raw(mt);
+   }
+   free(map->buffer);
+   map->buffer = map->ptr = NULL;
+}
+
 static void
 intel_miptree_map_blit(struct brw_context *brw,
   struct intel_mipmap_tree *mt,
@@ -3640,8 +3710,10 @@ intel_miptree_map(struct brw_context *brw,
   (mt->surf.row_pitch % 16 == 0)) {
   intel_miptree_map_movntdqa(brw, mt, map, level, slice);
 #endif
+   } else if (mt->surf.tiling != ISL_TILING_LINEAR) {
+  intel_miptree_map_tiled_memcpy(brw, mt, map, level, slice);
} else {
-  intel_miptree_map_gtt(brw, mt, map, level, slice);
+  intel_miptree_map_map(brw, mt, map, level, slice);