Re: [Mesa-dev] [PATCH v3] i965/miptree: Use cpu tiling/detiling when mapping
On Wed, Mar 14, 2018 at 05:18:58PM +, Chris Wilson wrote: > Quoting Nanley Chery (2018-03-14 17:14:15) > > On Mon, Mar 12, 2018 at 10:52:55AM -0700, Scott D Phillips wrote: > > > Rename the (un)map_gtt functions to (un)map_map (map by > > > returning a map) and add new functions (un)map_tiled_memcpy that > > > return a shadow buffer populated with the intel_tiled_memcpy > > > functions. > > > > > > Tiling/detiling with the cpu will be the only way to handle Yf/Ys > > > tiling, when support is added for those formats. > > > > > > v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson) > > > > > > v3: Add units to parameter names of tile_extents (Nanley Chery) > > > Use _mesa_align_malloc for the shadow copy (Nanley) > > > Continue using gtt maps on gen4 (Nanley) > > > --- > > > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 94 > > > --- > > > 1 file changed, 86 insertions(+), 8 deletions(-) > > > > > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > > index c6213b21629..fba17bf5b7b 100644 > > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > > @@ -31,6 +31,7 @@ > > > #include "intel_image.h" > > > #include "intel_mipmap_tree.h" > > > #include "intel_tex.h" > > > +#include "intel_tiled_memcpy.h" > > > #include "intel_blit.h" > > > #include "intel_fbo.h" > > > > > > @@ -3046,10 +3047,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree > > > *mt) > > > } > > > > > > static void > > > -intel_miptree_map_gtt(struct brw_context *brw, > > > - struct intel_mipmap_tree *mt, > > > - struct intel_miptree_map *map, > > > - unsigned int level, unsigned int slice) > > > +intel_miptree_map_map(struct brw_context *brw, > > > + struct intel_mipmap_tree *mt, > > > + struct intel_miptree_map *map, > > > + unsigned int level, unsigned int slice) > > > { > > > unsigned int bw, bh; > > > void *base; > > > @@ -3093,11 +3094,81 @@ intel_miptree_map_gtt(struct brw_context *brw, > > > } > > > > > > static void > > > -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt) > > > +intel_miptree_unmap_map(struct intel_mipmap_tree *mt) > > > { > > > intel_miptree_unmap_raw(mt); > > > } > > > > > > +/* Compute extent parameters for use with tiled_memcpy functions. > > > + * xs are in units of bytes and ys are in units of strides. */ > > > +static inline void > > > +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map, > > > + unsigned int level, unsigned int slice, unsigned int *x1_B, > > > + unsigned int *x2_B, unsigned int *y1_el, unsigned int > > > *y2_el) > > > +{ > > > + unsigned int block_width, block_height; > > > + unsigned int x0_el, y0_el; > > > + > > > + _mesa_get_format_block_size(mt->format, &block_width, &block_height); > > > + > > > + assert(map->x % block_width == 0); > > > + assert(map->y % block_height == 0); > > > + > > > + intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el); > > > + *x1_B = (map->x / block_width + x0_el) * mt->cpp; > > > + *y1_el = map->y / block_height + y0_el; > > > + *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * > > > mt->cpp; > > > + *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el; > > > +} > > > + > > > +static void > > > +intel_miptree_map_tiled_memcpy(struct brw_context *brw, > > > + struct intel_mipmap_tree *mt, > > > + struct intel_miptree_map *map, > > > + unsigned int level, unsigned int slice) > > > +{ > > > + unsigned int x1, x2, y1, y2; > > > + tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); > > > + map->stride = _mesa_format_row_stride(mt->format, map->w); > > > + map->buffer = map->ptr = _mesa_align_malloc(map->stride * (y2 - y1), > > > 16); > > > + > > > + assert(map->ptr); > > > + > > > + if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { > > > > It looks like we'll generate extra copies using this function, but only > > in a few corner cases. I think the following places should be using the > > INVALIDATE flag, but aren't: > > * _mesa_store_cleartexsubimage > > * generate_mipmap_uncompressed > > > > > + char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); > > > + src += mt->offset; > > > + > > > > It seems possible that the buffer object had a WC memory type during > > rendering. In that case, we need an sfence here right? > > > > This stuff is pretty new to me, so perhaps others would like to chime > > in. > > From talking to Ben on IRC and through my reading of the following section in the HW docs: Memory Types and Applicability to GFX, it seems that WC from GFX perspecitive is basically UC and doesn't use WC buffers: Write Combining
Re: [Mesa-dev] [PATCH v3] i965/miptree: Use cpu tiling/detiling when mapping
Quoting Nanley Chery (2018-03-14 17:14:15) > On Mon, Mar 12, 2018 at 10:52:55AM -0700, Scott D Phillips wrote: > > Rename the (un)map_gtt functions to (un)map_map (map by > > returning a map) and add new functions (un)map_tiled_memcpy that > > return a shadow buffer populated with the intel_tiled_memcpy > > functions. > > > > Tiling/detiling with the cpu will be the only way to handle Yf/Ys > > tiling, when support is added for those formats. > > > > v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson) > > > > v3: Add units to parameter names of tile_extents (Nanley Chery) > > Use _mesa_align_malloc for the shadow copy (Nanley) > > Continue using gtt maps on gen4 (Nanley) > > --- > > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 94 > > --- > > 1 file changed, 86 insertions(+), 8 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > index c6213b21629..fba17bf5b7b 100644 > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > @@ -31,6 +31,7 @@ > > #include "intel_image.h" > > #include "intel_mipmap_tree.h" > > #include "intel_tex.h" > > +#include "intel_tiled_memcpy.h" > > #include "intel_blit.h" > > #include "intel_fbo.h" > > > > @@ -3046,10 +3047,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree > > *mt) > > } > > > > static void > > -intel_miptree_map_gtt(struct brw_context *brw, > > - struct intel_mipmap_tree *mt, > > - struct intel_miptree_map *map, > > - unsigned int level, unsigned int slice) > > +intel_miptree_map_map(struct brw_context *brw, > > + struct intel_mipmap_tree *mt, > > + struct intel_miptree_map *map, > > + unsigned int level, unsigned int slice) > > { > > unsigned int bw, bh; > > void *base; > > @@ -3093,11 +3094,81 @@ intel_miptree_map_gtt(struct brw_context *brw, > > } > > > > static void > > -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt) > > +intel_miptree_unmap_map(struct intel_mipmap_tree *mt) > > { > > intel_miptree_unmap_raw(mt); > > } > > > > +/* Compute extent parameters for use with tiled_memcpy functions. > > + * xs are in units of bytes and ys are in units of strides. */ > > +static inline void > > +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map, > > + unsigned int level, unsigned int slice, unsigned int *x1_B, > > + unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el) > > +{ > > + unsigned int block_width, block_height; > > + unsigned int x0_el, y0_el; > > + > > + _mesa_get_format_block_size(mt->format, &block_width, &block_height); > > + > > + assert(map->x % block_width == 0); > > + assert(map->y % block_height == 0); > > + > > + intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el); > > + *x1_B = (map->x / block_width + x0_el) * mt->cpp; > > + *y1_el = map->y / block_height + y0_el; > > + *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp; > > + *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el; > > +} > > + > > +static void > > +intel_miptree_map_tiled_memcpy(struct brw_context *brw, > > + struct intel_mipmap_tree *mt, > > + struct intel_miptree_map *map, > > + unsigned int level, unsigned int slice) > > +{ > > + unsigned int x1, x2, y1, y2; > > + tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); > > + map->stride = _mesa_format_row_stride(mt->format, map->w); > > + map->buffer = map->ptr = _mesa_align_malloc(map->stride * (y2 - y1), > > 16); > > + > > + assert(map->ptr); > > + > > + if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { > > It looks like we'll generate extra copies using this function, but only > in a few corner cases. I think the following places should be using the > INVALIDATE flag, but aren't: > * _mesa_store_cleartexsubimage > * generate_mipmap_uncompressed > > > + char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); > > + src += mt->offset; > > + > > It seems possible that the buffer object had a WC memory type during > rendering. In that case, we need an sfence here right? > > This stuff is pretty new to me, so perhaps others would like to chime > in. > > > + tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride, > > + mt->surf.row_pitch, brw->has_swizzling, > > mt->surf.tiling, > > + memcpy); > > + > > + intel_miptree_unmap_raw(mt); > > + } > > +} > > + > > +static void > > +intel_miptree_unmap_tiled_memcpy(struct brw_context *brw, > > + struct intel_mipmap_tree *mt, > > + struct intel_miptree_map *map, > > + unsigned int lev
Re: [Mesa-dev] [PATCH v3] i965/miptree: Use cpu tiling/detiling when mapping
On Mon, Mar 12, 2018 at 10:52:55AM -0700, Scott D Phillips wrote: > Rename the (un)map_gtt functions to (un)map_map (map by > returning a map) and add new functions (un)map_tiled_memcpy that > return a shadow buffer populated with the intel_tiled_memcpy > functions. > > Tiling/detiling with the cpu will be the only way to handle Yf/Ys > tiling, when support is added for those formats. > > v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson) > > v3: Add units to parameter names of tile_extents (Nanley Chery) > Use _mesa_align_malloc for the shadow copy (Nanley) > Continue using gtt maps on gen4 (Nanley) > --- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 94 > --- > 1 file changed, 86 insertions(+), 8 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index c6213b21629..fba17bf5b7b 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -31,6 +31,7 @@ > #include "intel_image.h" > #include "intel_mipmap_tree.h" > #include "intel_tex.h" > +#include "intel_tiled_memcpy.h" > #include "intel_blit.h" > #include "intel_fbo.h" > > @@ -3046,10 +3047,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt) > } > > static void > -intel_miptree_map_gtt(struct brw_context *brw, > - struct intel_mipmap_tree *mt, > - struct intel_miptree_map *map, > - unsigned int level, unsigned int slice) > +intel_miptree_map_map(struct brw_context *brw, > + struct intel_mipmap_tree *mt, > + struct intel_miptree_map *map, > + unsigned int level, unsigned int slice) > { > unsigned int bw, bh; > void *base; > @@ -3093,11 +3094,81 @@ intel_miptree_map_gtt(struct brw_context *brw, > } > > static void > -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt) > +intel_miptree_unmap_map(struct intel_mipmap_tree *mt) > { > intel_miptree_unmap_raw(mt); > } > > +/* Compute extent parameters for use with tiled_memcpy functions. > + * xs are in units of bytes and ys are in units of strides. */ > +static inline void > +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map, > + unsigned int level, unsigned int slice, unsigned int *x1_B, > + unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el) > +{ > + unsigned int block_width, block_height; > + unsigned int x0_el, y0_el; > + > + _mesa_get_format_block_size(mt->format, &block_width, &block_height); > + > + assert(map->x % block_width == 0); > + assert(map->y % block_height == 0); > + > + intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el); > + *x1_B = (map->x / block_width + x0_el) * mt->cpp; > + *y1_el = map->y / block_height + y0_el; > + *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp; > + *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el; > +} > + > +static void > +intel_miptree_map_tiled_memcpy(struct brw_context *brw, > + struct intel_mipmap_tree *mt, > + struct intel_miptree_map *map, > + unsigned int level, unsigned int slice) > +{ > + unsigned int x1, x2, y1, y2; > + tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); > + map->stride = _mesa_format_row_stride(mt->format, map->w); > + map->buffer = map->ptr = _mesa_align_malloc(map->stride * (y2 - y1), 16); > + > + assert(map->ptr); > + > + if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { It looks like we'll generate extra copies using this function, but only in a few corner cases. I think the following places should be using the INVALIDATE flag, but aren't: * _mesa_store_cleartexsubimage * generate_mipmap_uncompressed > + char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); > + src += mt->offset; > + It seems possible that the buffer object had a WC memory type during rendering. In that case, we need an sfence here right? This stuff is pretty new to me, so perhaps others would like to chime in. > + tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride, > + mt->surf.row_pitch, brw->has_swizzling, > mt->surf.tiling, > + memcpy); > + > + intel_miptree_unmap_raw(mt); > + } > +} > + > +static void > +intel_miptree_unmap_tiled_memcpy(struct brw_context *brw, > + struct intel_mipmap_tree *mt, > + struct intel_miptree_map *map, > + unsigned int level, > + unsigned int slice) > +{ > + if (map->mode & GL_MAP_WRITE_BIT) { > + unsigned int x1, x2, y1, y2; > + tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); > + > + char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); > + dst
[Mesa-dev] [PATCH v3] i965/miptree: Use cpu tiling/detiling when mapping
Rename the (un)map_gtt functions to (un)map_map (map by returning a map) and add new functions (un)map_tiled_memcpy that return a shadow buffer populated with the intel_tiled_memcpy functions. Tiling/detiling with the cpu will be the only way to handle Yf/Ys tiling, when support is added for those formats. v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson) v3: Add units to parameter names of tile_extents (Nanley Chery) Use _mesa_align_malloc for the shadow copy (Nanley) Continue using gtt maps on gen4 (Nanley) --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 94 --- 1 file changed, 86 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index c6213b21629..fba17bf5b7b 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -31,6 +31,7 @@ #include "intel_image.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" +#include "intel_tiled_memcpy.h" #include "intel_blit.h" #include "intel_fbo.h" @@ -3046,10 +3047,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt) } static void -intel_miptree_map_gtt(struct brw_context *brw, - struct intel_mipmap_tree *mt, - struct intel_miptree_map *map, - unsigned int level, unsigned int slice) +intel_miptree_map_map(struct brw_context *brw, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, unsigned int slice) { unsigned int bw, bh; void *base; @@ -3093,11 +3094,81 @@ intel_miptree_map_gtt(struct brw_context *brw, } static void -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt) +intel_miptree_unmap_map(struct intel_mipmap_tree *mt) { intel_miptree_unmap_raw(mt); } +/* Compute extent parameters for use with tiled_memcpy functions. + * xs are in units of bytes and ys are in units of strides. */ +static inline void +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map, + unsigned int level, unsigned int slice, unsigned int *x1_B, + unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el) +{ + unsigned int block_width, block_height; + unsigned int x0_el, y0_el; + + _mesa_get_format_block_size(mt->format, &block_width, &block_height); + + assert(map->x % block_width == 0); + assert(map->y % block_height == 0); + + intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el); + *x1_B = (map->x / block_width + x0_el) * mt->cpp; + *y1_el = map->y / block_height + y0_el; + *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp; + *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el; +} + +static void +intel_miptree_map_tiled_memcpy(struct brw_context *brw, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, unsigned int slice) +{ + unsigned int x1, x2, y1, y2; + tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); + map->stride = _mesa_format_row_stride(mt->format, map->w); + map->buffer = map->ptr = _mesa_align_malloc(map->stride * (y2 - y1), 16); + + assert(map->ptr); + + if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { + char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); + src += mt->offset; + + tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride, + mt->surf.row_pitch, brw->has_swizzling, mt->surf.tiling, + memcpy); + + intel_miptree_unmap_raw(mt); + } +} + +static void +intel_miptree_unmap_tiled_memcpy(struct brw_context *brw, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, + unsigned int slice) +{ + if (map->mode & GL_MAP_WRITE_BIT) { + unsigned int x1, x2, y1, y2; + tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); + + char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); + dst += mt->offset; + + linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch, + map->stride, brw->has_swizzling, mt->surf.tiling, memcpy); + + intel_miptree_unmap_raw(mt); + } + _mesa_align_free(map->buffer); + map->buffer = map->ptr = NULL; +} + static void intel_miptree_map_blit(struct brw_context *brw, struct intel_mipmap_tree *mt, @@ -3655,8 +3726,10 @@ intel_miptree_map(struct brw_context *brw, (mt->surf.row_pitch % 16 == 0)) { intel_miptree_map_movntdqa(brw, mt, map, level, slice); #endif + } else if (mt->surf.tiling != ISL_TILING_LINEAR && brw->screen->devinfo.gen > 4) { + i