Nice, seems to halve CPU cost of submitting 100 command buffers in 1
submit with the simultaneous use flag set.

Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>

for the series.
On Thu, Nov 15, 2018 at 11:27 AM Samuel Pitoiset
<samuel.pitoi...@gmail.com> wrote:
>
> The chained submission is the fastest path and it should now
> be used more often than before. This removes some EOP events.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> ---
>  src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 103 ++++++++++--------
>  1 file changed, 55 insertions(+), 48 deletions(-)
>
> diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 
> b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> index abc4f3903d..f2d07a54db 100644
> --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> @@ -865,66 +865,73 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct 
> radeon_winsys_ctx *_ctx,
>         struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
>         struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
>         amdgpu_bo_list_handle bo_list;
> -       struct amdgpu_cs_request request;
> -       bool emit_signal_sem = sem_info->cs_emit_signal;
> +       struct amdgpu_cs_request request = {};
> +       struct amdgpu_cs_ib_info *ibs;
> +       struct radv_amdgpu_cs *cs0;
> +       unsigned number_of_ibs;
> +
>         assert(cs_count);
> +       cs0 = radv_amdgpu_cs(cs_array[0]);
>
> -       for (unsigned i = 0; i < cs_count;) {
> -               struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
> -               struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
> -               struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs 
> : initial_preamble_cs;
> -               unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - 
> !!preamble_cs,
> -                                   cs_count - i);
> +       /* Compute the number of IBs for this submit. */
> +       number_of_ibs = cs_count + !!initial_preamble_cs;
>
> -               memset(&request, 0, sizeof(request));
> +       /* Create a buffer object list. */
> +       r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 
> 0,
> +                                      initial_preamble_cs, radv_bo_list,
> +                                      &bo_list);
> +       if (r) {
> +               fprintf(stderr, "amdgpu: buffer list creation failed "
> +                               "for the fallback submission (%d)\n", r);
> +               return r;
> +       }
>
> -               r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, 
> NULL, 0,
> -                                              preamble_cs, radv_bo_list, 
> &bo_list);
> -               if (r) {
> -                       fprintf(stderr, "amdgpu: buffer list creation failed "
> -                                       "for the fallback submission (%d)\n", 
> r);
> -                       return r;
> -               }
> +       ibs = malloc(number_of_ibs * sizeof(*ibs));
> +       if (!ibs) {
> +               if (bo_list)
> +                       amdgpu_bo_list_destroy(bo_list);
> +               return -ENOMEM;
> +       }
>
> -               request.ip_type = cs0->hw_ip;
> -               request.ring = queue_idx;
> -               request.resources = bo_list;
> -               request.number_of_ibs = cnt + !!preamble_cs;
> -               request.ibs = ibs;
> -               request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, 
> queue_idx);
> +       /* Configure the CS request. */
> +       if (initial_preamble_cs)
> +               ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
>
> -               if (preamble_cs) {
> -                       ibs[0] = radv_amdgpu_cs(preamble_cs)->ib;
> -               }
> +       for (unsigned i = 0; i < cs_count; i++) {
> +               struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
>
> -               for (unsigned j = 0; j < cnt; ++j) {
> -                       struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i 
> + j]);
> -                       ibs[j + !!preamble_cs] = cs->ib;
> +               ibs[i + !!initial_preamble_cs] = cs->ib;
>
> -                       if (cs->is_chained) {
> -                               *cs->ib_size_ptr -= 4;
> -                               cs->is_chained = false;
> -                       }
> +               if (cs->is_chained) {
> +                       *cs->ib_size_ptr -= 4;
> +                       cs->is_chained = false;
>                 }
> +       }
>
> -               sem_info->cs_emit_signal = (i == cs_count - cnt) ? 
> emit_signal_sem : false;
> -               r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
> -               if (r) {
> -                       if (r == -ENOMEM)
> -                               fprintf(stderr, "amdgpu: Not enough memory 
> for command submission.\n");
> -                       else
> -                               fprintf(stderr, "amdgpu: The CS has been 
> rejected, "
> -                                               "see dmesg for more 
> information.\n");
> -               }
> +       request.ip_type = cs0->hw_ip;
> +       request.ring = queue_idx;
> +       request.resources = bo_list;
> +       request.number_of_ibs = number_of_ibs;
> +       request.ibs = ibs;
> +       request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
>
> -               if (bo_list)
> -                       amdgpu_bo_list_destroy(bo_list);
> +       /* Submit the CS. */
> +       r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
> +       if (r) {
> +               if (r == -ENOMEM)
> +                       fprintf(stderr, "amdgpu: Not enough memory for 
> command submission.\n");
> +               else
> +                       fprintf(stderr, "amdgpu: The CS has been rejected, "
> +                                       "see dmesg for more information.\n");
> +       }
>
> -               if (r)
> -                       return r;
> +       if (bo_list)
> +               amdgpu_bo_list_destroy(bo_list);
> +       free(ibs);
> +
> +       if (r)
> +               return r;
>
> -               i += cnt;
> -       }
>         if (fence)
>                 radv_amdgpu_request_to_fence(ctx, fence, &request);
>
> @@ -1131,7 +1138,7 @@ static int radv_amdgpu_winsys_cs_submit(struct 
> radeon_winsys_ctx *_ctx,
>         if (!cs->ws->use_ib_bos) {
>                 ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, 
> sem_info, bo_list, cs_array,
>                                                            cs_count, 
> initial_preamble_cs, continue_preamble_cs, _fence);
> -       } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && 
> cs->ws->batchchain) {
> +       } else if (can_patch && cs->ws->batchchain) {
>                 ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, 
> sem_info, bo_list, cs_array,
>                                                             cs_count, 
> initial_preamble_cs, continue_preamble_cs, _fence);
>         } else {
> --
> 2.19.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to