Re: [Mesa-dev] [PATCH 14/20] radeonsi: implement TGSI compute dispatch

Marek Olšák Mon, 04 Apr 2016 10:42:02 -0700

This should use radeon_set_sh_reg... like other patches.

Marek


On Sat, Apr 2, 2016 at 3:10 PM, Bas Nieuwenhuizen
<[email protected]> wrote:
> Signed-off-by: Bas Nieuwenhuizen <[email protected]>
> ---
>  src/gallium/drivers/radeonsi/si_compute.c | 104 
> ++++++++++++++++++++++--------
>  1 file changed, 77 insertions(+), 27 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 74db8d4..64ad2f3 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -346,13 +346,85 @@ static void si_upload_compute_input(struct si_context 
> *sctx,
>         pipe_resource_reference((struct pipe_resource**)&input_buffer, NULL);
>  }
>
> +static void si_setup_tgsi_grid(struct si_context *sctx,
> +                                const struct pipe_grid_info *info)
> +{
> +       struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
> +       unsigned grid_size_reg = R_00B900_COMPUTE_USER_DATA_0 +
> +                                 4 * SI_SGPR_GRID_SIZE;
> +
> +       if (info->indirect) {
> +               uint64_t base_va = r600_resource(info->indirect)->gpu_address;
> +               uint64_t va = base_va + info->indirect_offset;
> +               int i;
> +
> +               radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> +                                (struct r600_resource *)info->indirect,
> +                                RADEON_USAGE_READ, 
> RADEON_PRIO_DRAW_INDIRECT);
> +
> +               for (i = 0; i < 3; ++i) {
> +                       radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
> +                       radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
> +                                       COPY_DATA_DST_SEL(COPY_DATA_REG));
> +                       radeon_emit(cs, (va +  4 * i));
> +                       radeon_emit(cs, (va + 4 * i) >> 32);
> +                       radeon_emit(cs, (grid_size_reg >> 2) + i);
> +                       radeon_emit(cs, 0);
> +               }
> +       } else {
> +
> +               radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 3, 0));
> +               radeon_emit(cs, (grid_size_reg - SI_SH_REG_OFFSET) >> 2);
> +               radeon_emit(cs, info->grid[0]);
> +               radeon_emit(cs, info->grid[1]);
> +               radeon_emit(cs, info->grid[2]);
> +       }
> +}
> +
> +static void si_emit_dispatch_packets(struct si_context *sctx,
> +                                     const struct pipe_grid_info *info)
> +{
> +       struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
> +
> +       radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 3, 0));
> +       radeon_emit(cs, (R_00B81C_COMPUTE_NUM_THREAD_X - SI_SH_REG_OFFSET) >> 
> 2);
> +       radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]));
> +       radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]));
> +       radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]));
> +
> +       if (info->indirect) {
> +               uint64_t base_va = r600_resource(info->indirect)->gpu_address;
> +
> +               radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> +                                (struct r600_resource *)info->indirect,
> +                                RADEON_USAGE_READ, 
> RADEON_PRIO_DRAW_INDIRECT);
> +
> +               radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) |
> +                               PKT3_SHADER_TYPE_S(1));
> +               radeon_emit(cs, 1);
> +               radeon_emit(cs, base_va);
> +               radeon_emit(cs, base_va >> 32);
> +
> +               radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, 0) |
> +                               PKT3_SHADER_TYPE_S(1));
> +               radeon_emit(cs, info->indirect_offset);
> +               radeon_emit(cs, 1);
> +       } else {
> +               radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
> +                               PKT3_SHADER_TYPE_S(1));
> +               radeon_emit(cs, info->grid[0]);
> +               radeon_emit(cs, info->grid[1]);
> +               radeon_emit(cs, info->grid[2]);
> +               radeon_emit(cs, 1);
> +       }
> +}
> +
>  static void si_launch_grid(
>                 struct pipe_context *ctx, const struct pipe_grid_info *info)
>  {
>         struct si_context *sctx = (struct si_context*)ctx;
>         struct si_compute *program = sctx->cs_shader_state.program;
> -       struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
> -       unsigned i;
> +       int i;
>
>         si_need_cs_space(sctx);
>
> @@ -367,21 +439,12 @@ static void si_launch_grid(
>                          SI_CONTEXT_FLAG_COMPUTE;
>         si_emit_cache_flush(sctx, NULL);
>
> -       pm4->compute_pkt = true;
> -
>         if (!si_switch_compute_shader(sctx, program, &program->shader, 
> info->pc))
>                 return;
>
>         if (program->input_size)
>                 si_upload_compute_input(sctx, info);
>
> -       si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X,
> -                               S_00B81C_NUM_THREAD_FULL(info->block[0]));
> -       si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y,
> -                               S_00B820_NUM_THREAD_FULL(info->block[1]));
> -       si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z,
> -                               S_00B824_NUM_THREAD_FULL(info->block[2]));
> -
>         /* Global buffers */
>         for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) {
>                 struct r600_resource *buffer =
> @@ -394,23 +457,10 @@ static void si_launch_grid(
>                                           RADEON_PRIO_COMPUTE_GLOBAL);
>         }
>
> -       si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT);
> -       si_pm4_cmd_add(pm4, info->grid[0]); /* Thread groups DIM_X */
> -       si_pm4_cmd_add(pm4, info->grid[1]); /* Thread groups DIM_Y */
> -       si_pm4_cmd_add(pm4, info->grid[2]); /* Thread gropus DIM_Z */
> -       si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */
> -        si_pm4_cmd_end(pm4, false);
> -
> -       si_pm4_emit(sctx, pm4);
> -
> -#if 0
> -       fprintf(stderr, "cdw: %i\n", sctx->cs->cdw);
> -       for (i = 0; i < sctx->cs->cdw; i++) {
> -               fprintf(stderr, "%4i : 0x%08X\n", i, sctx->cs->buf[i]);
> -       }
> -#endif
> +       if (program->ir_type == PIPE_SHADER_IR_TGSI)
> +               si_setup_tgsi_grid(sctx, info);
>
> -       si_pm4_free_state(sctx, pm4, ~0);
> +       si_emit_dispatch_packets(sctx, info);
>
>         sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
>                          SI_CONTEXT_INV_VMEM_L1 |
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 14/20] radeonsi: implement TGSI compute dispatch

Reply via email to