On 04/01/2016 07:34 AM, Ilia Mirkin wrote:

On Mar 31, 2016 12:09 PM, "Samuel Pitoiset" <samuel.pitoi...@gmail.com
<mailto:samuel.pitoi...@gmail.com>> wrote:
 >
 > The grid size is stored as three 32-bits integers in the indirect
 > buffer but the launch descriptor uses a 32-bits integer for both
 > griddim_y and griddim_z like this (z << 16) | y. To make it work,
 > the 16 high bits of griddim_y are overwritten by griddim_z.
 >
 > Changes from v2:
 >  - upload the whole descriptor and overwrite a few bits
 >
 > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com
<mailto:samuel.pitoi...@gmail.com>>
 > ---
 >  src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 99
+++++++++++++++++--------
 >  1 file changed, 67 insertions(+), 32 deletions(-)
 >
 > diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
 > index 4d4808c..1a2afee 100644
 > --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
 > +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
 > @@ -435,9 +435,7 @@ nve4_state_validate_cp(struct nvc0_context *nvc0,
uint32_t mask)
 >  static void
 >  nve4_compute_upload_input(struct nvc0_context *nvc0,
 >                            struct nve4_cp_launch_desc *desc,
 > -                          const void *input,
 > -                          const uint *block_layout,
 > -                          const uint *grid_layout)
 > +                          const struct pipe_grid_info *info)
 >  {
 >     struct nvc0_screen *screen = nvc0->screen;
 >     struct nouveau_pushbuf *push = nvc0->base.pushbuf;
 > @@ -455,7 +453,7 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
 >        PUSH_DATA (push, 0x1);
 >        BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
 >        PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
 > -      PUSH_DATAp(push, input, cp->parm_size / 4);
 > +      PUSH_DATAp(push, info->input, cp->parm_size / 4);
 >
 >        /* Bind user parameters coming from clover. */
 >        /* TODO: This should be harmonized with uniform_bo. */
 > @@ -470,8 +468,17 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
 >     PUSH_DATA (push, 0x1);
 >     BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
 >     PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
 > -   PUSH_DATAp(push, block_layout, 3);
 > -   PUSH_DATAp(push, grid_layout, 3);
 > +   PUSH_DATAp(push, info->block, 3);
 > +   if (unlikely(info->indirect)) {
 > +      struct nv04_resource *res = nv04_resource(info->indirect);
 > +      uint32_t offset = res->offset + info->indirect_offset;
 > +
 > +      PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);

This can get you into trouble as it might flush things. You need to
stick a nouveau_pushbuf_space call before the begin.

Good catch.


 > +      nouveau_pushbuf_data(push, res->bo, offset,
 > +                           NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
 > +   } else {
 > +      PUSH_DATAp(push, info->grid, 3);
 > +   }
 >     PUSH_DATA (push, 0);
 >
 >     BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
 > @@ -491,23 +498,21 @@ nve4_compute_derive_cache_split(struct
nvc0_context *nvc0, uint32_t shared_size)
 >  static void
 >  nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
 >                                 struct nve4_cp_launch_desc *desc,
 > -                               uint32_t label,
 > -                               const uint *block_layout,
 > -                               const uint *grid_layout)
 > +                               const struct pipe_grid_info *info)
 >  {
 >     const struct nvc0_screen *screen = nvc0->screen;
 >     const struct nvc0_program *cp = nvc0->compprog;
 >
 >     nve4_cp_launch_desc_init_default(desc);
 >
 > -   desc->entry = nvc0_program_symbol_offset(cp, label);
 > +   desc->entry = nvc0_program_symbol_offset(cp, info->pc);
 >
 > -   desc->griddim_x = grid_layout[0];
 > -   desc->griddim_y = grid_layout[1];
 > -   desc->griddim_z = grid_layout[2];
 > -   desc->blockdim_x = block_layout[0];
 > -   desc->blockdim_y = block_layout[1];
 > -   desc->blockdim_z = block_layout[2];
 > +   desc->griddim_x = info->grid[0];
 > +   desc->griddim_y = info->grid[1];
 > +   desc->griddim_z = info->grid[2];
 > +   desc->blockdim_x = info->block[0];
 > +   desc->blockdim_y = info->block[1];
 > +   desc->blockdim_z = info->block[2];
 >
 >     desc->shared_size = align(cp->cp.smem_size, 0x100);
 >     desc->local_size_p = align(cp->cp.lmem_size, 0x10);
 > @@ -566,30 +571,60 @@ nve4_launch_grid(struct pipe_context *pipe,
const struct pipe_grid_info *info)
 >     if (ret)
 >        goto out;
 >
 > -   nve4_compute_setup_launch_desc(nvc0, desc, info->pc,
 > -                                  info->block, info->grid);
 > +   nve4_compute_setup_launch_desc(nvc0, desc, info);
 >
 > -   nve4_compute_upload_input(nvc0, desc, info->input, info->block,
info->grid);
 > +   nve4_compute_upload_input(nvc0, desc, info);
 >
 >  #ifdef DEBUG
 >     if (debug_get_num_option("NV50_PROG_DEBUG", 0))
 >        nve4_compute_dump_launch_desc(desc);
 >  #endif
 >
 > +   if (unlikely(info->indirect)) {
 > +      struct nv04_resource *res = nv04_resource(info->indirect);
 > +      uint32_t offset = res->offset + info->indirect_offset;
 > +
 > +      /* upload the descriptor */
 > +      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
 > +      PUSH_DATAh(push, desc_gpuaddr);
 > +      PUSH_DATA (push, desc_gpuaddr);
 > +      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
 > +      PUSH_DATA (push, 256);
 > +      PUSH_DATA (push, 1);
 > +      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (256 / 4));
 > +      PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
 > +      PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4);
 > +
 > +      /* overwrite griddim_x and griddim_y as two 32-bits integers even
 > +       * if griddim_y must be a 16-bits integer */
 > +      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
 > +      PUSH_DATAh(push, desc_gpuaddr + 48);
 > +      PUSH_DATA (push, desc_gpuaddr + 48);
 > +      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
 > +      PUSH_DATA (push, 8);
 > +      PUSH_DATA (push, 1);
 > +      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (8 / 4));
 > +      PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
 > +      nouveau_pushbuf_space(push, 16, 0, 1);
 > +      PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);

Right, so like this, but before the begin :)

Mmmh, are you sure it's required to put nouveau_pushbuf_space() before the begin? We don't do that for indirect compute on Fermi, or maybe it's also broken.


 > +      nouveau_pushbuf_data(push, res->bo, offset,
 > +                           NVC0_IB_ENTRY_1_NO_PREFETCH | 2 * 4);
 > +
 > +      /* overwrite the 16 high bits of griddim_y with griddim_z because
 > +       * we need (z << 16) | x */
 > +      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
 > +      PUSH_DATAh(push, desc_gpuaddr + 54);
 > +      PUSH_DATA (push, desc_gpuaddr + 54);
 > +      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
 > +      PUSH_DATA (push, 4);
 > +      PUSH_DATA (push, 1);
 > +      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (4 / 4));
 > +      PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
 > +      nouveau_pushbuf_data(push, res->bo, offset + 8,
 > +                           NVC0_IB_ENTRY_1_NO_PREFETCH | 1 * 4);
 > +   }
 > +
 >     /* upload descriptor and flush */
 > -#if 0
 > -   BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
 > -   PUSH_DATAh(push, desc_gpuaddr);
 > -   PUSH_DATA (push, desc_gpuaddr);
 > -   BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
 > -   PUSH_DATA (push, 256);
 > -   PUSH_DATA (push, 1);
 > -   BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (256 / 4));
 > -   PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
 > -   PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4);
 > -   BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
 > -   PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB | NVE4_COMPUTE_FLUSH_CODE);
 > -#endif
 >     BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1);
 >     PUSH_DATA (push, desc_gpuaddr >> 8);
 >     BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1);
 > --
 > 2.7.4
 >
 > _______________________________________________
 > mesa-dev mailing list
 > mesa-dev@lists.freedesktop.org <mailto:mesa-dev@lists.freedesktop.org>
 > https://lists.freedesktop.org/mailman/listinfo/mesa-dev


--
-Samuel
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to