Re: [Mesa-dev] [PATCH v2 14/24] intel/blorp: Add a CCS ambiguation pass

2018-01-30 Thread Jason Ekstrand
On Tue, Jan 30, 2018 at 2:24 PM, Nanley Chery  wrote:

> On Fri, Jan 19, 2018 at 05:25:41PM -0800, Jason Ekstrand wrote:
> > On Fri, Jan 19, 2018 at 3:47 PM, Jason Ekstrand 
> > wrote:
> >
> > > This pass performs an "ambiguate" operation on a CCS-compressed surface
> > > by manually writing zeros into the CCS.  On gen8+, ISL gives us a
> fairly
> > > detailed notion of how the CCS is laid out so this is fairly simple to
> > > do.  On gen7, the CCS tiling is quite crazy but that isn't an issue
> > > because we can only do CCS on single-slice images so we can just blast
> > > over the entire CCS buffer if we want to.
> > > ---
> > >  src/intel/blorp/blorp.h   |   5 ++
> > >  src/intel/blorp/blorp_clear.c | 149 ++
> > > 
> > >  2 files changed, 154 insertions(+)
> > >
> > > diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> > > index a1dd571..478a9af 100644
> > > --- a/src/intel/blorp/blorp.h
> > > +++ b/src/intel/blorp/blorp.h
> > > @@ -204,6 +204,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
> > >enum blorp_fast_clear_op resolve_op);
> > >
> > >  void
> > > +blorp_ccs_ambiguate(struct blorp_batch *batch,
> > > +struct blorp_surf *surf,
> > > +uint32_t level, uint32_t layer);
> > > +
> > > +void
> > >  blorp_mcs_partial_resolve(struct blorp_batch *batch,
> > >struct blorp_surf *surf,
> > >enum isl_format format,
> > > diff --git a/src/intel/blorp/blorp_clear.c
> b/src/intel/blorp/blorp_clear.c
> > > index 8e7bc9f..fa2abd9 100644
> > > --- a/src/intel/blorp/blorp_clear.c
> > > +++ b/src/intel/blorp/blorp_clear.c
> > > @@ -881,3 +881,152 @@ blorp_mcs_partial_resolve(struct blorp_batch
> *batch,
> > >
> > > batch->blorp->exec(batch, );
> > >  }
> > > +
> > > +/** Clear a CCS to the "uncompressed" state
> > > + *
> > > + * This pass is the CCS equivalent of a "HiZ resolve".  It sets the
> CCS
> > > values
> > > + * for a given layer/level of a surface to 0x0 which is the
> "uncompressed"
> > > + * state which tells the sampler to go look at the main surface.
> > > + */
> > > +void
> > > +blorp_ccs_ambiguate(struct blorp_batch *batch,
> > > +struct blorp_surf *surf,
> > > +uint32_t level, uint32_t layer)
> > > +{
> > > +   struct blorp_params params;
> > > +   blorp_params_init();
> > > +
> > > +   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7);
> > > +
> > > +   const struct isl_format_layout *aux_fmtl =
> > > +  isl_format_get_layout(surf->aux_surf->format);
> > > +   assert(aux_fmtl->txc == ISL_TXC_CCS);
> > > +
> > > +   params.dst = (struct brw_blorp_surface_info) {
> > > +  .enabled = true,
> > > +  .addr = surf->aux_addr,
> > > +  .view = {
> > > + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
> > > + .format = ISL_FORMAT_R32G32B32A32_UINT,
> > > + .base_level = 0,
> > > + .base_array_layer = 0,
> > > + .levels = 1,
> > > + .array_len = 1,
> > > + .swizzle = ISL_SWIZZLE_IDENTITY,
> > > +  },
> > > +   };
> > > +
> > > +   uint32_t z = 0;
> > > +   if (surf->surf->dim == ISL_SURF_DIM_3D) {
> > > +  z = layer;
> > > +  layer = 0;
> > > +   }
> > > +
> > > +   uint32_t offset_B, x_offset_el, y_offset_el;
> > > +   isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z,
> > > +_offset_el, _offset_el);
> > > +   isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling,
> > > aux_fmtl->bpb,
> > > +  surf->aux_surf->row_pitch,
> > > +  x_offset_el, y_offset_el,
> > > +  _B, _offset_el,
> > > _offset_el);
> > > +   params.dst.addr.offset += offset_B;
> > > +
> > > +   const uint32_t width_px = minify(surf->surf->logical_
> level0_px.width,
> > > level);
> > > +   const uint32_t height_px = minify(surf->surf->logical_
> level0_px.height,
> > > level);
> > > +   const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
> > > +   const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);
> > > +
> > > +   struct isl_tile_info ccs_tile_info;
> > > +   isl_surf_get_tile_info(surf->aux_surf, _tile_info);
> > > +
> > > +   /* We're going to map it as a regular RGBA32_UINT surface.  We
> need to
> > > +* downscale a good deal.  We start by computing the area on the
> CCS to
> > > +* clear in units of Y-tiled cache lines.
> > > +*/
> > > +   uint32_t x_offset_y_cl, y_offset_y_cl, width_y_cl, height_y_cl;
> > > +   if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) {
> > > +  /* From the Sky Lake PRM Vol. 12 in the section on planes:
> > > +   *
> > > +   *"The Color Control Surface (CCS) contains the compression
> > > status
> > > +   *of the cache-line pairs. The compression state of the
> 

Re: [Mesa-dev] [PATCH v2 14/24] intel/blorp: Add a CCS ambiguation pass

2018-01-30 Thread Nanley Chery
On Fri, Jan 19, 2018 at 05:25:41PM -0800, Jason Ekstrand wrote:
> On Fri, Jan 19, 2018 at 3:47 PM, Jason Ekstrand 
> wrote:
> 
> > This pass performs an "ambiguate" operation on a CCS-compressed surface
> > by manually writing zeros into the CCS.  On gen8+, ISL gives us a fairly
> > detailed notion of how the CCS is laid out so this is fairly simple to
> > do.  On gen7, the CCS tiling is quite crazy but that isn't an issue
> > because we can only do CCS on single-slice images so we can just blast
> > over the entire CCS buffer if we want to.
> > ---
> >  src/intel/blorp/blorp.h   |   5 ++
> >  src/intel/blorp/blorp_clear.c | 149 ++
> > 
> >  2 files changed, 154 insertions(+)
> >
> > diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> > index a1dd571..478a9af 100644
> > --- a/src/intel/blorp/blorp.h
> > +++ b/src/intel/blorp/blorp.h
> > @@ -204,6 +204,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
> >enum blorp_fast_clear_op resolve_op);
> >
> >  void
> > +blorp_ccs_ambiguate(struct blorp_batch *batch,
> > +struct blorp_surf *surf,
> > +uint32_t level, uint32_t layer);
> > +
> > +void
> >  blorp_mcs_partial_resolve(struct blorp_batch *batch,
> >struct blorp_surf *surf,
> >enum isl_format format,
> > diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
> > index 8e7bc9f..fa2abd9 100644
> > --- a/src/intel/blorp/blorp_clear.c
> > +++ b/src/intel/blorp/blorp_clear.c
> > @@ -881,3 +881,152 @@ blorp_mcs_partial_resolve(struct blorp_batch *batch,
> >
> > batch->blorp->exec(batch, );
> >  }
> > +
> > +/** Clear a CCS to the "uncompressed" state
> > + *
> > + * This pass is the CCS equivalent of a "HiZ resolve".  It sets the CCS
> > values
> > + * for a given layer/level of a surface to 0x0 which is the "uncompressed"
> > + * state which tells the sampler to go look at the main surface.
> > + */
> > +void
> > +blorp_ccs_ambiguate(struct blorp_batch *batch,
> > +struct blorp_surf *surf,
> > +uint32_t level, uint32_t layer)
> > +{
> > +   struct blorp_params params;
> > +   blorp_params_init();
> > +
> > +   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7);
> > +
> > +   const struct isl_format_layout *aux_fmtl =
> > +  isl_format_get_layout(surf->aux_surf->format);
> > +   assert(aux_fmtl->txc == ISL_TXC_CCS);
> > +
> > +   params.dst = (struct brw_blorp_surface_info) {
> > +  .enabled = true,
> > +  .addr = surf->aux_addr,
> > +  .view = {
> > + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
> > + .format = ISL_FORMAT_R32G32B32A32_UINT,
> > + .base_level = 0,
> > + .base_array_layer = 0,
> > + .levels = 1,
> > + .array_len = 1,
> > + .swizzle = ISL_SWIZZLE_IDENTITY,
> > +  },
> > +   };
> > +
> > +   uint32_t z = 0;
> > +   if (surf->surf->dim == ISL_SURF_DIM_3D) {
> > +  z = layer;
> > +  layer = 0;
> > +   }
> > +
> > +   uint32_t offset_B, x_offset_el, y_offset_el;
> > +   isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z,
> > +_offset_el, _offset_el);
> > +   isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling,
> > aux_fmtl->bpb,
> > +  surf->aux_surf->row_pitch,
> > +  x_offset_el, y_offset_el,
> > +  _B, _offset_el,
> > _offset_el);
> > +   params.dst.addr.offset += offset_B;
> > +
> > +   const uint32_t width_px = minify(surf->surf->logical_level0_px.width,
> > level);
> > +   const uint32_t height_px = minify(surf->surf->logical_level0_px.height,
> > level);
> > +   const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
> > +   const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);
> > +
> > +   struct isl_tile_info ccs_tile_info;
> > +   isl_surf_get_tile_info(surf->aux_surf, _tile_info);
> > +
> > +   /* We're going to map it as a regular RGBA32_UINT surface.  We need to
> > +* downscale a good deal.  We start by computing the area on the CCS to
> > +* clear in units of Y-tiled cache lines.
> > +*/
> > +   uint32_t x_offset_y_cl, y_offset_y_cl, width_y_cl, height_y_cl;
> > +   if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) {
> > +  /* From the Sky Lake PRM Vol. 12 in the section on planes:
> > +   *
> > +   *"The Color Control Surface (CCS) contains the compression
> > status
> > +   *of the cache-line pairs. The compression state of the
> > cache-line
> > +   *pair is specified by 2 bits in the CCS.  Each CCS cache-line
> > +   *represents an area on the main surface of 16x16 sets of 128
> > byte
> > +   *Y-tiled cache-line-pairs. CCS is always Y tiled."
> > +   *
> > +   * Each 2-bit surface element in the CCS corresponds to 

Re: [Mesa-dev] [PATCH v2 14/24] intel/blorp: Add a CCS ambiguation pass

2018-01-29 Thread Jason Ekstrand
On Mon, Jan 29, 2018 at 7:21 AM, Pohjolainen, Topi <
topi.pohjolai...@gmail.com> wrote:

> On Fri, Jan 26, 2018 at 05:58:25PM +0200, Pohjolainen, Topi wrote:
> > On Wed, Jan 24, 2018 at 12:29:05PM -0800, Jason Ekstrand wrote:
> > > On Wed, Jan 24, 2018 at 6:15 AM, Pohjolainen, Topi <
> > > topi.pohjolai...@gmail.com> wrote:
> > >
> > > > On Fri, Jan 19, 2018 at 03:47:31PM -0800, Jason Ekstrand wrote:
> > > > > This pass performs an "ambiguate" operation on a CCS-compressed
> surface
> > > > > by manually writing zeros into the CCS.  On gen8+, ISL gives us a
> fairly
> > > > > detailed notion of how the CCS is laid out so this is fairly
> simple to
> > > > > do.  On gen7, the CCS tiling is quite crazy but that isn't an issue
> > > > > because we can only do CCS on single-slice images so we can just
> blast
> > > > > over the entire CCS buffer if we want to.
> > > > > ---
> > > > >  src/intel/blorp/blorp.h   |   5 ++
> > > > >  src/intel/blorp/blorp_clear.c | 149 ++
> > > > 
> > > > >  2 files changed, 154 insertions(+)
> > > > >
> > > > > diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> > > > > index a1dd571..478a9af 100644
> > > > > --- a/src/intel/blorp/blorp.h
> > > > > +++ b/src/intel/blorp/blorp.h
> > > > > @@ -204,6 +204,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
> > > > >enum blorp_fast_clear_op resolve_op);
> > > > >
> > > > >  void
> > > > > +blorp_ccs_ambiguate(struct blorp_batch *batch,
> > > > > +struct blorp_surf *surf,
> > > > > +uint32_t level, uint32_t layer);
> > > > > +
> > > > > +void
> > > > >  blorp_mcs_partial_resolve(struct blorp_batch *batch,
> > > > >struct blorp_surf *surf,
> > > > >enum isl_format format,
> > > > > diff --git a/src/intel/blorp/blorp_clear.c
> > > > b/src/intel/blorp/blorp_clear.c
> > > > > index 8e7bc9f..fa2abd9 100644
> > > > > --- a/src/intel/blorp/blorp_clear.c
> > > > > +++ b/src/intel/blorp/blorp_clear.c
> > > > > @@ -881,3 +881,152 @@ blorp_mcs_partial_resolve(struct blorp_batch
> > > > *batch,
> > > > >
> > > > > batch->blorp->exec(batch, );
> > > > >  }
> > > > > +
> > > > > +/** Clear a CCS to the "uncompressed" state
> > > > > + *
> > > > > + * This pass is the CCS equivalent of a "HiZ resolve".  It sets
> the CCS
> > > > values
> > > > > + * for a given layer/level of a surface to 0x0 which is the
> > > > "uncompressed"
> > > > > + * state which tells the sampler to go look at the main surface.
> > > > > + */
> > > > > +void
> > > > > +blorp_ccs_ambiguate(struct blorp_batch *batch,
> > > > > +struct blorp_surf *surf,
> > > > > +uint32_t level, uint32_t layer)
> > > > > +{
> > > > > +   struct blorp_params params;
> > > > > +   blorp_params_init();
> > > > > +
> > > > > +   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7);
> > > > > +
> > > > > +   const struct isl_format_layout *aux_fmtl =
> > > > > +  isl_format_get_layout(surf->aux_surf->format);
> > > > > +   assert(aux_fmtl->txc == ISL_TXC_CCS);
> > > > > +
> > > > > +   params.dst = (struct brw_blorp_surface_info) {
> > > > > +  .enabled = true,
> > > > > +  .addr = surf->aux_addr,
> > > > > +  .view = {
> > > > > + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
> > > > > + .format = ISL_FORMAT_R32G32B32A32_UINT,
> > > > > + .base_level = 0,
> > > > > + .base_array_layer = 0,
> > > > > + .levels = 1,
> > > > > + .array_len = 1,
> > > > > + .swizzle = ISL_SWIZZLE_IDENTITY,
> > > > > +  },
> > > > > +   };
> > > > > +
> > > > > +   uint32_t z = 0;
> > > > > +   if (surf->surf->dim == ISL_SURF_DIM_3D) {
> > > > > +  z = layer;
> > > > > +  layer = 0;
> > > > > +   }
> > > > > +
> > > > > +   uint32_t offset_B, x_offset_el, y_offset_el;
> > > > > +   isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z,
> > > > > +_offset_el, _offset_el);
> > > > > +   isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling,
> > > > aux_fmtl->bpb,
> > > > > +  surf->aux_surf->row_pitch,
> > > > > +  x_offset_el, y_offset_el,
> > > > > +  _B, _offset_el,
> > > > _offset_el);
> > > > > +   params.dst.addr.offset += offset_B;
> > > > > +
> > > > > +   const uint32_t width_px = minify(surf->surf->logical_
> level0_px.width,
> > > > level);
> > > > > +   const uint32_t height_px = minify(surf->surf->logical_
> level0_px.height,
> > > > level);
> > > > > +   const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
> > > > > +   const uint32_t height_el = DIV_ROUND_UP(height_px,
> aux_fmtl->bh);
> >
> > I need to think about these numbers a little more. I think I got the
> other
> > sources of my confusion figured out further down. See further down.
>
> Right. I got a 

Re: [Mesa-dev] [PATCH v2 14/24] intel/blorp: Add a CCS ambiguation pass

2018-01-29 Thread Pohjolainen, Topi
On Fri, Jan 26, 2018 at 05:58:25PM +0200, Pohjolainen, Topi wrote:
> On Wed, Jan 24, 2018 at 12:29:05PM -0800, Jason Ekstrand wrote:
> > On Wed, Jan 24, 2018 at 6:15 AM, Pohjolainen, Topi <
> > topi.pohjolai...@gmail.com> wrote:
> > 
> > > On Fri, Jan 19, 2018 at 03:47:31PM -0800, Jason Ekstrand wrote:
> > > > This pass performs an "ambiguate" operation on a CCS-compressed surface
> > > > by manually writing zeros into the CCS.  On gen8+, ISL gives us a fairly
> > > > detailed notion of how the CCS is laid out so this is fairly simple to
> > > > do.  On gen7, the CCS tiling is quite crazy but that isn't an issue
> > > > because we can only do CCS on single-slice images so we can just blast
> > > > over the entire CCS buffer if we want to.
> > > > ---
> > > >  src/intel/blorp/blorp.h   |   5 ++
> > > >  src/intel/blorp/blorp_clear.c | 149 ++
> > > 
> > > >  2 files changed, 154 insertions(+)
> > > >
> > > > diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> > > > index a1dd571..478a9af 100644
> > > > --- a/src/intel/blorp/blorp.h
> > > > +++ b/src/intel/blorp/blorp.h
> > > > @@ -204,6 +204,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
> > > >enum blorp_fast_clear_op resolve_op);
> > > >
> > > >  void
> > > > +blorp_ccs_ambiguate(struct blorp_batch *batch,
> > > > +struct blorp_surf *surf,
> > > > +uint32_t level, uint32_t layer);
> > > > +
> > > > +void
> > > >  blorp_mcs_partial_resolve(struct blorp_batch *batch,
> > > >struct blorp_surf *surf,
> > > >enum isl_format format,
> > > > diff --git a/src/intel/blorp/blorp_clear.c
> > > b/src/intel/blorp/blorp_clear.c
> > > > index 8e7bc9f..fa2abd9 100644
> > > > --- a/src/intel/blorp/blorp_clear.c
> > > > +++ b/src/intel/blorp/blorp_clear.c
> > > > @@ -881,3 +881,152 @@ blorp_mcs_partial_resolve(struct blorp_batch
> > > *batch,
> > > >
> > > > batch->blorp->exec(batch, );
> > > >  }
> > > > +
> > > > +/** Clear a CCS to the "uncompressed" state
> > > > + *
> > > > + * This pass is the CCS equivalent of a "HiZ resolve".  It sets the CCS
> > > values
> > > > + * for a given layer/level of a surface to 0x0 which is the
> > > "uncompressed"
> > > > + * state which tells the sampler to go look at the main surface.
> > > > + */
> > > > +void
> > > > +blorp_ccs_ambiguate(struct blorp_batch *batch,
> > > > +struct blorp_surf *surf,
> > > > +uint32_t level, uint32_t layer)
> > > > +{
> > > > +   struct blorp_params params;
> > > > +   blorp_params_init();
> > > > +
> > > > +   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7);
> > > > +
> > > > +   const struct isl_format_layout *aux_fmtl =
> > > > +  isl_format_get_layout(surf->aux_surf->format);
> > > > +   assert(aux_fmtl->txc == ISL_TXC_CCS);
> > > > +
> > > > +   params.dst = (struct brw_blorp_surface_info) {
> > > > +  .enabled = true,
> > > > +  .addr = surf->aux_addr,
> > > > +  .view = {
> > > > + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
> > > > + .format = ISL_FORMAT_R32G32B32A32_UINT,
> > > > + .base_level = 0,
> > > > + .base_array_layer = 0,
> > > > + .levels = 1,
> > > > + .array_len = 1,
> > > > + .swizzle = ISL_SWIZZLE_IDENTITY,
> > > > +  },
> > > > +   };
> > > > +
> > > > +   uint32_t z = 0;
> > > > +   if (surf->surf->dim == ISL_SURF_DIM_3D) {
> > > > +  z = layer;
> > > > +  layer = 0;
> > > > +   }
> > > > +
> > > > +   uint32_t offset_B, x_offset_el, y_offset_el;
> > > > +   isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z,
> > > > +_offset_el, _offset_el);
> > > > +   isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling,
> > > aux_fmtl->bpb,
> > > > +  surf->aux_surf->row_pitch,
> > > > +  x_offset_el, y_offset_el,
> > > > +  _B, _offset_el,
> > > _offset_el);
> > > > +   params.dst.addr.offset += offset_B;
> > > > +
> > > > +   const uint32_t width_px = 
> > > > minify(surf->surf->logical_level0_px.width,
> > > level);
> > > > +   const uint32_t height_px = 
> > > > minify(surf->surf->logical_level0_px.height,
> > > level);
> > > > +   const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
> > > > +   const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);
> 
> I need to think about these numbers a little more. I think I got the other
> sources of my confusion figured out further down. See further down.

Right. I got a little confused when "width_px" and "height_px" where
calculated against the main surface dimensions. I think I had forgotten how
the aux surface actually got defined. It has the same dimensions as the main,
block/element structure mapping it against pixels. I somehow remembered the
aux dimensions were 

Re: [Mesa-dev] [PATCH v2 14/24] intel/blorp: Add a CCS ambiguation pass

2018-01-26 Thread Pohjolainen, Topi
On Wed, Jan 24, 2018 at 12:29:05PM -0800, Jason Ekstrand wrote:
> On Wed, Jan 24, 2018 at 6:15 AM, Pohjolainen, Topi <
> topi.pohjolai...@gmail.com> wrote:
> 
> > On Fri, Jan 19, 2018 at 03:47:31PM -0800, Jason Ekstrand wrote:
> > > This pass performs an "ambiguate" operation on a CCS-compressed surface
> > > by manually writing zeros into the CCS.  On gen8+, ISL gives us a fairly
> > > detailed notion of how the CCS is laid out so this is fairly simple to
> > > do.  On gen7, the CCS tiling is quite crazy but that isn't an issue
> > > because we can only do CCS on single-slice images so we can just blast
> > > over the entire CCS buffer if we want to.
> > > ---
> > >  src/intel/blorp/blorp.h   |   5 ++
> > >  src/intel/blorp/blorp_clear.c | 149 ++
> > 
> > >  2 files changed, 154 insertions(+)
> > >
> > > diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> > > index a1dd571..478a9af 100644
> > > --- a/src/intel/blorp/blorp.h
> > > +++ b/src/intel/blorp/blorp.h
> > > @@ -204,6 +204,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
> > >enum blorp_fast_clear_op resolve_op);
> > >
> > >  void
> > > +blorp_ccs_ambiguate(struct blorp_batch *batch,
> > > +struct blorp_surf *surf,
> > > +uint32_t level, uint32_t layer);
> > > +
> > > +void
> > >  blorp_mcs_partial_resolve(struct blorp_batch *batch,
> > >struct blorp_surf *surf,
> > >enum isl_format format,
> > > diff --git a/src/intel/blorp/blorp_clear.c
> > b/src/intel/blorp/blorp_clear.c
> > > index 8e7bc9f..fa2abd9 100644
> > > --- a/src/intel/blorp/blorp_clear.c
> > > +++ b/src/intel/blorp/blorp_clear.c
> > > @@ -881,3 +881,152 @@ blorp_mcs_partial_resolve(struct blorp_batch
> > *batch,
> > >
> > > batch->blorp->exec(batch, );
> > >  }
> > > +
> > > +/** Clear a CCS to the "uncompressed" state
> > > + *
> > > + * This pass is the CCS equivalent of a "HiZ resolve".  It sets the CCS
> > values
> > > + * for a given layer/level of a surface to 0x0 which is the
> > "uncompressed"
> > > + * state which tells the sampler to go look at the main surface.
> > > + */
> > > +void
> > > +blorp_ccs_ambiguate(struct blorp_batch *batch,
> > > +struct blorp_surf *surf,
> > > +uint32_t level, uint32_t layer)
> > > +{
> > > +   struct blorp_params params;
> > > +   blorp_params_init();
> > > +
> > > +   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7);
> > > +
> > > +   const struct isl_format_layout *aux_fmtl =
> > > +  isl_format_get_layout(surf->aux_surf->format);
> > > +   assert(aux_fmtl->txc == ISL_TXC_CCS);
> > > +
> > > +   params.dst = (struct brw_blorp_surface_info) {
> > > +  .enabled = true,
> > > +  .addr = surf->aux_addr,
> > > +  .view = {
> > > + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
> > > + .format = ISL_FORMAT_R32G32B32A32_UINT,
> > > + .base_level = 0,
> > > + .base_array_layer = 0,
> > > + .levels = 1,
> > > + .array_len = 1,
> > > + .swizzle = ISL_SWIZZLE_IDENTITY,
> > > +  },
> > > +   };
> > > +
> > > +   uint32_t z = 0;
> > > +   if (surf->surf->dim == ISL_SURF_DIM_3D) {
> > > +  z = layer;
> > > +  layer = 0;
> > > +   }
> > > +
> > > +   uint32_t offset_B, x_offset_el, y_offset_el;
> > > +   isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z,
> > > +_offset_el, _offset_el);
> > > +   isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling,
> > aux_fmtl->bpb,
> > > +  surf->aux_surf->row_pitch,
> > > +  x_offset_el, y_offset_el,
> > > +  _B, _offset_el,
> > _offset_el);
> > > +   params.dst.addr.offset += offset_B;
> > > +
> > > +   const uint32_t width_px = minify(surf->surf->logical_level0_px.width,
> > level);
> > > +   const uint32_t height_px = 
> > > minify(surf->surf->logical_level0_px.height,
> > level);
> > > +   const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
> > > +   const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);

I need to think about these numbers a little more. I think I got the other
sources of my confusion figured out further down. See further down.

> > > +
> > > +   struct isl_tile_info ccs_tile_info;
> > > +   isl_surf_get_tile_info(surf->aux_surf, _tile_info);
> > > +
> > > +   /* We're going to map it as a regular RGBA32_UINT surface.  We need
> > to
> > > +* downscale a good deal.  We start by computing the area on the CCS
> > to
> > > +* clear in units of Y-tiled cache lines.
> > > +*/
> > > +   uint32_t x_offset_y_cl, y_offset_y_cl, width_y_cl, height_y_cl;
> > > +   if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) {
> > > +  /* From the Sky Lake PRM Vol. 12 in the section on planes:
> > > +   *
> > > +   *"The Color 

Re: [Mesa-dev] [PATCH v2 14/24] intel/blorp: Add a CCS ambiguation pass

2018-01-24 Thread Jason Ekstrand
On Wed, Jan 24, 2018 at 6:15 AM, Pohjolainen, Topi <
topi.pohjolai...@gmail.com> wrote:

> On Fri, Jan 19, 2018 at 03:47:31PM -0800, Jason Ekstrand wrote:
> > This pass performs an "ambiguate" operation on a CCS-compressed surface
> > by manually writing zeros into the CCS.  On gen8+, ISL gives us a fairly
> > detailed notion of how the CCS is laid out so this is fairly simple to
> > do.  On gen7, the CCS tiling is quite crazy but that isn't an issue
> > because we can only do CCS on single-slice images so we can just blast
> > over the entire CCS buffer if we want to.
> > ---
> >  src/intel/blorp/blorp.h   |   5 ++
> >  src/intel/blorp/blorp_clear.c | 149 ++
> 
> >  2 files changed, 154 insertions(+)
> >
> > diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> > index a1dd571..478a9af 100644
> > --- a/src/intel/blorp/blorp.h
> > +++ b/src/intel/blorp/blorp.h
> > @@ -204,6 +204,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
> >enum blorp_fast_clear_op resolve_op);
> >
> >  void
> > +blorp_ccs_ambiguate(struct blorp_batch *batch,
> > +struct blorp_surf *surf,
> > +uint32_t level, uint32_t layer);
> > +
> > +void
> >  blorp_mcs_partial_resolve(struct blorp_batch *batch,
> >struct blorp_surf *surf,
> >enum isl_format format,
> > diff --git a/src/intel/blorp/blorp_clear.c
> b/src/intel/blorp/blorp_clear.c
> > index 8e7bc9f..fa2abd9 100644
> > --- a/src/intel/blorp/blorp_clear.c
> > +++ b/src/intel/blorp/blorp_clear.c
> > @@ -881,3 +881,152 @@ blorp_mcs_partial_resolve(struct blorp_batch
> *batch,
> >
> > batch->blorp->exec(batch, );
> >  }
> > +
> > +/** Clear a CCS to the "uncompressed" state
> > + *
> > + * This pass is the CCS equivalent of a "HiZ resolve".  It sets the CCS
> values
> > + * for a given layer/level of a surface to 0x0 which is the
> "uncompressed"
> > + * state which tells the sampler to go look at the main surface.
> > + */
> > +void
> > +blorp_ccs_ambiguate(struct blorp_batch *batch,
> > +struct blorp_surf *surf,
> > +uint32_t level, uint32_t layer)
> > +{
> > +   struct blorp_params params;
> > +   blorp_params_init();
> > +
> > +   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7);
> > +
> > +   const struct isl_format_layout *aux_fmtl =
> > +  isl_format_get_layout(surf->aux_surf->format);
> > +   assert(aux_fmtl->txc == ISL_TXC_CCS);
> > +
> > +   params.dst = (struct brw_blorp_surface_info) {
> > +  .enabled = true,
> > +  .addr = surf->aux_addr,
> > +  .view = {
> > + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
> > + .format = ISL_FORMAT_R32G32B32A32_UINT,
> > + .base_level = 0,
> > + .base_array_layer = 0,
> > + .levels = 1,
> > + .array_len = 1,
> > + .swizzle = ISL_SWIZZLE_IDENTITY,
> > +  },
> > +   };
> > +
> > +   uint32_t z = 0;
> > +   if (surf->surf->dim == ISL_SURF_DIM_3D) {
> > +  z = layer;
> > +  layer = 0;
> > +   }
> > +
> > +   uint32_t offset_B, x_offset_el, y_offset_el;
> > +   isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z,
> > +_offset_el, _offset_el);
> > +   isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling,
> aux_fmtl->bpb,
> > +  surf->aux_surf->row_pitch,
> > +  x_offset_el, y_offset_el,
> > +  _B, _offset_el,
> _offset_el);
> > +   params.dst.addr.offset += offset_B;
> > +
> > +   const uint32_t width_px = minify(surf->surf->logical_level0_px.width,
> level);
> > +   const uint32_t height_px = minify(surf->surf->logical_level0_px.height,
> level);
> > +   const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
> > +   const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);
> > +
> > +   struct isl_tile_info ccs_tile_info;
> > +   isl_surf_get_tile_info(surf->aux_surf, _tile_info);
> > +
> > +   /* We're going to map it as a regular RGBA32_UINT surface.  We need
> to
> > +* downscale a good deal.  We start by computing the area on the CCS
> to
> > +* clear in units of Y-tiled cache lines.
> > +*/
> > +   uint32_t x_offset_y_cl, y_offset_y_cl, width_y_cl, height_y_cl;
> > +   if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) {
> > +  /* From the Sky Lake PRM Vol. 12 in the section on planes:
> > +   *
> > +   *"The Color Control Surface (CCS) contains the compression
> status
> > +   *of the cache-line pairs. The compression state of the
> cache-line
> > +   *pair is specified by 2 bits in the CCS.  Each CCS cache-line
> > +   *represents an area on the main surface of 16x16 sets of 128
> byte
> > +   *Y-tiled cache-line-pairs. CCS is always Y tiled."
> > +   *
> > +   * Each 2-bit surface element in the CCS corresponds to a 

Re: [Mesa-dev] [PATCH v2 14/24] intel/blorp: Add a CCS ambiguation pass

2018-01-24 Thread Pohjolainen, Topi
On Wed, Jan 24, 2018 at 04:15:00PM +0200, Pohjolainen, Topi wrote:
> On Fri, Jan 19, 2018 at 03:47:31PM -0800, Jason Ekstrand wrote:
> > This pass performs an "ambiguate" operation on a CCS-compressed surface
> > by manually writing zeros into the CCS.  On gen8+, ISL gives us a fairly
> > detailed notion of how the CCS is laid out so this is fairly simple to
> > do.  On gen7, the CCS tiling is quite crazy but that isn't an issue
> > because we can only do CCS on single-slice images so we can just blast
> > over the entire CCS buffer if we want to.
> > ---
> >  src/intel/blorp/blorp.h   |   5 ++
> >  src/intel/blorp/blorp_clear.c | 149 
> > ++
> >  2 files changed, 154 insertions(+)
> > 
> > diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> > index a1dd571..478a9af 100644
> > --- a/src/intel/blorp/blorp.h
> > +++ b/src/intel/blorp/blorp.h
> > @@ -204,6 +204,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
> >enum blorp_fast_clear_op resolve_op);
> >  
> >  void
> > +blorp_ccs_ambiguate(struct blorp_batch *batch,
> > +struct blorp_surf *surf,
> > +uint32_t level, uint32_t layer);
> > +
> > +void
> >  blorp_mcs_partial_resolve(struct blorp_batch *batch,
> >struct blorp_surf *surf,
> >enum isl_format format,
> > diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
> > index 8e7bc9f..fa2abd9 100644
> > --- a/src/intel/blorp/blorp_clear.c
> > +++ b/src/intel/blorp/blorp_clear.c
> > @@ -881,3 +881,152 @@ blorp_mcs_partial_resolve(struct blorp_batch *batch,
> >  
> > batch->blorp->exec(batch, );
> >  }
> > +
> > +/** Clear a CCS to the "uncompressed" state
> > + *
> > + * This pass is the CCS equivalent of a "HiZ resolve".  It sets the CCS 
> > values
> > + * for a given layer/level of a surface to 0x0 which is the "uncompressed"
> > + * state which tells the sampler to go look at the main surface.
> > + */
> > +void
> > +blorp_ccs_ambiguate(struct blorp_batch *batch,
> > +struct blorp_surf *surf,
> > +uint32_t level, uint32_t layer)
> > +{
> > +   struct blorp_params params;
> > +   blorp_params_init();
> > +
> > +   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7);
> > +
> > +   const struct isl_format_layout *aux_fmtl =
> > +  isl_format_get_layout(surf->aux_surf->format);
> > +   assert(aux_fmtl->txc == ISL_TXC_CCS);
> > +
> > +   params.dst = (struct brw_blorp_surface_info) {
> > +  .enabled = true,
> > +  .addr = surf->aux_addr,
> > +  .view = {
> > + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
> > + .format = ISL_FORMAT_R32G32B32A32_UINT,
> > + .base_level = 0,
> > + .base_array_layer = 0,
> > + .levels = 1,
> > + .array_len = 1,
> > + .swizzle = ISL_SWIZZLE_IDENTITY,
> > +  },
> > +   };
> > +
> > +   uint32_t z = 0;
> > +   if (surf->surf->dim == ISL_SURF_DIM_3D) {
> > +  z = layer;
> > +  layer = 0;
> > +   }
> > +
> > +   uint32_t offset_B, x_offset_el, y_offset_el;
> > +   isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z,
> > +_offset_el, _offset_el);
> > +   isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling, 
> > aux_fmtl->bpb,
> > +  surf->aux_surf->row_pitch,
> > +  x_offset_el, y_offset_el,
> > +  _B, _offset_el, 
> > _offset_el);
> > +   params.dst.addr.offset += offset_B;
> > +
> > +   const uint32_t width_px = minify(surf->surf->logical_level0_px.width, 
> > level);
> > +   const uint32_t height_px = minify(surf->surf->logical_level0_px.height, 
> > level);
> > +   const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
> > +   const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);
> > +
> > +   struct isl_tile_info ccs_tile_info;
> > +   isl_surf_get_tile_info(surf->aux_surf, _tile_info);
> > +
> > +   /* We're going to map it as a regular RGBA32_UINT surface.  We need to
> > +* downscale a good deal.  We start by computing the area on the CCS to
> > +* clear in units of Y-tiled cache lines.
> > +*/
> > +   uint32_t x_offset_y_cl, y_offset_y_cl, width_y_cl, height_y_cl;
> > +   if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) {
> > +  /* From the Sky Lake PRM Vol. 12 in the section on planes:
> > +   *
> > +   *"The Color Control Surface (CCS) contains the compression 
> > status
> > +   *of the cache-line pairs. The compression state of the 
> > cache-line
> > +   *pair is specified by 2 bits in the CCS.  Each CCS cache-line
> > +   *represents an area on the main surface of 16x16 sets of 128 
> > byte
> > +   *Y-tiled cache-line-pairs. CCS is always Y tiled."
> > +   *
> > +   * Each 2-bit surface element in the CCS corresponds to a 

Re: [Mesa-dev] [PATCH v2 14/24] intel/blorp: Add a CCS ambiguation pass

2018-01-24 Thread Pohjolainen, Topi
On Fri, Jan 19, 2018 at 03:47:31PM -0800, Jason Ekstrand wrote:
> This pass performs an "ambiguate" operation on a CCS-compressed surface
> by manually writing zeros into the CCS.  On gen8+, ISL gives us a fairly
> detailed notion of how the CCS is laid out so this is fairly simple to
> do.  On gen7, the CCS tiling is quite crazy but that isn't an issue
> because we can only do CCS on single-slice images so we can just blast
> over the entire CCS buffer if we want to.
> ---
>  src/intel/blorp/blorp.h   |   5 ++
>  src/intel/blorp/blorp_clear.c | 149 
> ++
>  2 files changed, 154 insertions(+)
> 
> diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> index a1dd571..478a9af 100644
> --- a/src/intel/blorp/blorp.h
> +++ b/src/intel/blorp/blorp.h
> @@ -204,6 +204,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
>enum blorp_fast_clear_op resolve_op);
>  
>  void
> +blorp_ccs_ambiguate(struct blorp_batch *batch,
> +struct blorp_surf *surf,
> +uint32_t level, uint32_t layer);
> +
> +void
>  blorp_mcs_partial_resolve(struct blorp_batch *batch,
>struct blorp_surf *surf,
>enum isl_format format,
> diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
> index 8e7bc9f..fa2abd9 100644
> --- a/src/intel/blorp/blorp_clear.c
> +++ b/src/intel/blorp/blorp_clear.c
> @@ -881,3 +881,152 @@ blorp_mcs_partial_resolve(struct blorp_batch *batch,
>  
> batch->blorp->exec(batch, );
>  }
> +
> +/** Clear a CCS to the "uncompressed" state
> + *
> + * This pass is the CCS equivalent of a "HiZ resolve".  It sets the CCS 
> values
> + * for a given layer/level of a surface to 0x0 which is the "uncompressed"
> + * state which tells the sampler to go look at the main surface.
> + */
> +void
> +blorp_ccs_ambiguate(struct blorp_batch *batch,
> +struct blorp_surf *surf,
> +uint32_t level, uint32_t layer)
> +{
> +   struct blorp_params params;
> +   blorp_params_init();
> +
> +   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7);
> +
> +   const struct isl_format_layout *aux_fmtl =
> +  isl_format_get_layout(surf->aux_surf->format);
> +   assert(aux_fmtl->txc == ISL_TXC_CCS);
> +
> +   params.dst = (struct brw_blorp_surface_info) {
> +  .enabled = true,
> +  .addr = surf->aux_addr,
> +  .view = {
> + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
> + .format = ISL_FORMAT_R32G32B32A32_UINT,
> + .base_level = 0,
> + .base_array_layer = 0,
> + .levels = 1,
> + .array_len = 1,
> + .swizzle = ISL_SWIZZLE_IDENTITY,
> +  },
> +   };
> +
> +   uint32_t z = 0;
> +   if (surf->surf->dim == ISL_SURF_DIM_3D) {
> +  z = layer;
> +  layer = 0;
> +   }
> +
> +   uint32_t offset_B, x_offset_el, y_offset_el;
> +   isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z,
> +_offset_el, _offset_el);
> +   isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling, aux_fmtl->bpb,
> +  surf->aux_surf->row_pitch,
> +  x_offset_el, y_offset_el,
> +  _B, _offset_el, _offset_el);
> +   params.dst.addr.offset += offset_B;
> +
> +   const uint32_t width_px = minify(surf->surf->logical_level0_px.width, 
> level);
> +   const uint32_t height_px = minify(surf->surf->logical_level0_px.height, 
> level);
> +   const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
> +   const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);
> +
> +   struct isl_tile_info ccs_tile_info;
> +   isl_surf_get_tile_info(surf->aux_surf, _tile_info);
> +
> +   /* We're going to map it as a regular RGBA32_UINT surface.  We need to
> +* downscale a good deal.  We start by computing the area on the CCS to
> +* clear in units of Y-tiled cache lines.
> +*/
> +   uint32_t x_offset_y_cl, y_offset_y_cl, width_y_cl, height_y_cl;
> +   if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) {
> +  /* From the Sky Lake PRM Vol. 12 in the section on planes:
> +   *
> +   *"The Color Control Surface (CCS) contains the compression status
> +   *of the cache-line pairs. The compression state of the cache-line
> +   *pair is specified by 2 bits in the CCS.  Each CCS cache-line
> +   *represents an area on the main surface of 16x16 sets of 128 byte
> +   *Y-tiled cache-line-pairs. CCS is always Y tiled."
> +   *
> +   * Each 2-bit surface element in the CCS corresponds to a single
> +   * cache-line pair in the main surface.  This means that 16x16 el block
> +   * in the CCS maps to a Y-tiled cache line.  Fortunately, CCS layouts
> +   * are calculated with a very large alignment so we can round up to a
> +   * whole cache line without worrying about overdraw.
> +   

Re: [Mesa-dev] [PATCH v2 14/24] intel/blorp: Add a CCS ambiguation pass

2018-01-19 Thread Jason Ekstrand
On Fri, Jan 19, 2018 at 3:47 PM, Jason Ekstrand 
wrote:

> This pass performs an "ambiguate" operation on a CCS-compressed surface
> by manually writing zeros into the CCS.  On gen8+, ISL gives us a fairly
> detailed notion of how the CCS is laid out so this is fairly simple to
> do.  On gen7, the CCS tiling is quite crazy but that isn't an issue
> because we can only do CCS on single-slice images so we can just blast
> over the entire CCS buffer if we want to.
> ---
>  src/intel/blorp/blorp.h   |   5 ++
>  src/intel/blorp/blorp_clear.c | 149 ++
> 
>  2 files changed, 154 insertions(+)
>
> diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> index a1dd571..478a9af 100644
> --- a/src/intel/blorp/blorp.h
> +++ b/src/intel/blorp/blorp.h
> @@ -204,6 +204,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
>enum blorp_fast_clear_op resolve_op);
>
>  void
> +blorp_ccs_ambiguate(struct blorp_batch *batch,
> +struct blorp_surf *surf,
> +uint32_t level, uint32_t layer);
> +
> +void
>  blorp_mcs_partial_resolve(struct blorp_batch *batch,
>struct blorp_surf *surf,
>enum isl_format format,
> diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
> index 8e7bc9f..fa2abd9 100644
> --- a/src/intel/blorp/blorp_clear.c
> +++ b/src/intel/blorp/blorp_clear.c
> @@ -881,3 +881,152 @@ blorp_mcs_partial_resolve(struct blorp_batch *batch,
>
> batch->blorp->exec(batch, );
>  }
> +
> +/** Clear a CCS to the "uncompressed" state
> + *
> + * This pass is the CCS equivalent of a "HiZ resolve".  It sets the CCS
> values
> + * for a given layer/level of a surface to 0x0 which is the "uncompressed"
> + * state which tells the sampler to go look at the main surface.
> + */
> +void
> +blorp_ccs_ambiguate(struct blorp_batch *batch,
> +struct blorp_surf *surf,
> +uint32_t level, uint32_t layer)
> +{
> +   struct blorp_params params;
> +   blorp_params_init();
> +
> +   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7);
> +
> +   const struct isl_format_layout *aux_fmtl =
> +  isl_format_get_layout(surf->aux_surf->format);
> +   assert(aux_fmtl->txc == ISL_TXC_CCS);
> +
> +   params.dst = (struct brw_blorp_surface_info) {
> +  .enabled = true,
> +  .addr = surf->aux_addr,
> +  .view = {
> + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
> + .format = ISL_FORMAT_R32G32B32A32_UINT,
> + .base_level = 0,
> + .base_array_layer = 0,
> + .levels = 1,
> + .array_len = 1,
> + .swizzle = ISL_SWIZZLE_IDENTITY,
> +  },
> +   };
> +
> +   uint32_t z = 0;
> +   if (surf->surf->dim == ISL_SURF_DIM_3D) {
> +  z = layer;
> +  layer = 0;
> +   }
> +
> +   uint32_t offset_B, x_offset_el, y_offset_el;
> +   isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z,
> +_offset_el, _offset_el);
> +   isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling,
> aux_fmtl->bpb,
> +  surf->aux_surf->row_pitch,
> +  x_offset_el, y_offset_el,
> +  _B, _offset_el,
> _offset_el);
> +   params.dst.addr.offset += offset_B;
> +
> +   const uint32_t width_px = minify(surf->surf->logical_level0_px.width,
> level);
> +   const uint32_t height_px = minify(surf->surf->logical_level0_px.height,
> level);
> +   const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
> +   const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);
> +
> +   struct isl_tile_info ccs_tile_info;
> +   isl_surf_get_tile_info(surf->aux_surf, _tile_info);
> +
> +   /* We're going to map it as a regular RGBA32_UINT surface.  We need to
> +* downscale a good deal.  We start by computing the area on the CCS to
> +* clear in units of Y-tiled cache lines.
> +*/
> +   uint32_t x_offset_y_cl, y_offset_y_cl, width_y_cl, height_y_cl;
> +   if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) {
> +  /* From the Sky Lake PRM Vol. 12 in the section on planes:
> +   *
> +   *"The Color Control Surface (CCS) contains the compression
> status
> +   *of the cache-line pairs. The compression state of the
> cache-line
> +   *pair is specified by 2 bits in the CCS.  Each CCS cache-line
> +   *represents an area on the main surface of 16x16 sets of 128
> byte
> +   *Y-tiled cache-line-pairs. CCS is always Y tiled."
> +   *
> +   * Each 2-bit surface element in the CCS corresponds to a single
> +   * cache-line pair in the main surface.  This means that 16x16 el
> block
> +   * in the CCS maps to a Y-tiled cache line.  Fortunately, CCS
> layouts
> +   * are calculated with a very large alignment so we can round up to
> a
> +   * whole cache line without worrying 

[Mesa-dev] [PATCH v2 14/24] intel/blorp: Add a CCS ambiguation pass

2018-01-19 Thread Jason Ekstrand
This pass performs an "ambiguate" operation on a CCS-compressed surface
by manually writing zeros into the CCS.  On gen8+, ISL gives us a fairly
detailed notion of how the CCS is laid out so this is fairly simple to
do.  On gen7, the CCS tiling is quite crazy but that isn't an issue
because we can only do CCS on single-slice images so we can just blast
over the entire CCS buffer if we want to.
---
 src/intel/blorp/blorp.h   |   5 ++
 src/intel/blorp/blorp_clear.c | 149 ++
 2 files changed, 154 insertions(+)

diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
index a1dd571..478a9af 100644
--- a/src/intel/blorp/blorp.h
+++ b/src/intel/blorp/blorp.h
@@ -204,6 +204,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
   enum blorp_fast_clear_op resolve_op);
 
 void
+blorp_ccs_ambiguate(struct blorp_batch *batch,
+struct blorp_surf *surf,
+uint32_t level, uint32_t layer);
+
+void
 blorp_mcs_partial_resolve(struct blorp_batch *batch,
   struct blorp_surf *surf,
   enum isl_format format,
diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
index 8e7bc9f..fa2abd9 100644
--- a/src/intel/blorp/blorp_clear.c
+++ b/src/intel/blorp/blorp_clear.c
@@ -881,3 +881,152 @@ blorp_mcs_partial_resolve(struct blorp_batch *batch,
 
batch->blorp->exec(batch, );
 }
+
+/** Clear a CCS to the "uncompressed" state
+ *
+ * This pass is the CCS equivalent of a "HiZ resolve".  It sets the CCS values
+ * for a given layer/level of a surface to 0x0 which is the "uncompressed"
+ * state which tells the sampler to go look at the main surface.
+ */
+void
+blorp_ccs_ambiguate(struct blorp_batch *batch,
+struct blorp_surf *surf,
+uint32_t level, uint32_t layer)
+{
+   struct blorp_params params;
+   blorp_params_init();
+
+   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7);
+
+   const struct isl_format_layout *aux_fmtl =
+  isl_format_get_layout(surf->aux_surf->format);
+   assert(aux_fmtl->txc == ISL_TXC_CCS);
+
+   params.dst = (struct brw_blorp_surface_info) {
+  .enabled = true,
+  .addr = surf->aux_addr,
+  .view = {
+ .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
+ .format = ISL_FORMAT_R32G32B32A32_UINT,
+ .base_level = 0,
+ .base_array_layer = 0,
+ .levels = 1,
+ .array_len = 1,
+ .swizzle = ISL_SWIZZLE_IDENTITY,
+  },
+   };
+
+   uint32_t z = 0;
+   if (surf->surf->dim == ISL_SURF_DIM_3D) {
+  z = layer;
+  layer = 0;
+   }
+
+   uint32_t offset_B, x_offset_el, y_offset_el;
+   isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z,
+_offset_el, _offset_el);
+   isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling, aux_fmtl->bpb,
+  surf->aux_surf->row_pitch,
+  x_offset_el, y_offset_el,
+  _B, _offset_el, _offset_el);
+   params.dst.addr.offset += offset_B;
+
+   const uint32_t width_px = minify(surf->surf->logical_level0_px.width, 
level);
+   const uint32_t height_px = minify(surf->surf->logical_level0_px.height, 
level);
+   const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
+   const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);
+
+   struct isl_tile_info ccs_tile_info;
+   isl_surf_get_tile_info(surf->aux_surf, _tile_info);
+
+   /* We're going to map it as a regular RGBA32_UINT surface.  We need to
+* downscale a good deal.  We start by computing the area on the CCS to
+* clear in units of Y-tiled cache lines.
+*/
+   uint32_t x_offset_y_cl, y_offset_y_cl, width_y_cl, height_y_cl;
+   if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) {
+  /* From the Sky Lake PRM Vol. 12 in the section on planes:
+   *
+   *"The Color Control Surface (CCS) contains the compression status
+   *of the cache-line pairs. The compression state of the cache-line
+   *pair is specified by 2 bits in the CCS.  Each CCS cache-line
+   *represents an area on the main surface of 16x16 sets of 128 byte
+   *Y-tiled cache-line-pairs. CCS is always Y tiled."
+   *
+   * Each 2-bit surface element in the CCS corresponds to a single
+   * cache-line pair in the main surface.  This means that 16x16 el block
+   * in the CCS maps to a Y-tiled cache line.  Fortunately, CCS layouts
+   * are calculated with a very large alignment so we can round up to a
+   * whole cache line without worrying about overdraw.
+   */
+
+  /* On Broadwell and above, a CCS tile is the same as a Y tile when
+   * viewed at the cache-line granularity.  Fortunately, the horizontal
+   * and vertical alignment requirements of the CCS are such that we can
+   * align to an entire cache line without worrying about