> -----Original Message-----
> From: amd-gfx [mailto:[email protected]] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:39 AM
> To: [email protected]
> Cc: Liu, Monk
> Subject: [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme
> 
> 1) Adapt to vulkan:
> Now use double SWITCH BUFFER to replace the 128 nops w/a,
> because when vulkan introduced, umd can insert 7 ~ 16 IBs
> per submit which makes 256 DW size cannot hold the whole
> DMAframe (if we still insert those 128 nops), CP team suggests
> use double SWITCH_BUFFERs, instead of tricky 128 NOPs w/a.
> 
> 2) To fix the CE VM fault issue when MCBP introduced:
> Need one more COND_EXEC wrapping IB part (original one us
> for VM switch part).
> 
> this change can fix vm fault issue caused by below scenario
> without this change:
> 
> >CE passed original COND_EXEC (no MCBP issued this moment),
>  proceed as normal.
> 
> >DE catch up to this COND_EXEC, but this time MCBP issued,
>  thus DE treats all following packages as NOP. The following
>  VM switch packages now looks just as NOP to DE, so DE
>  dosen't do VM flush at all.
> 
> >Now CE proceeds to the first IBc, and triggers VM fault,
>  because DE didn't do VM flush for this DMAframe.
> 
> 3) change estimated alloc size for gfx9.
> with new DMAframe scheme, we need modify emit_frame_size
> for gfx9
> 
> with above changes, no more 128 NOPs w/a after VM flush
> 
> Change-Id: Ib3f92d9d5a81bfff0369a00f23e1e5891797089a
> Signed-off-by: Monk Liu <[email protected]>

I haven't really kept up on this whole saga with the DE and CE so assuming 
there are no regressions on bare metal, patches 12, 13 are:
Acked-by: Alex Deucher <[email protected]>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 77
> +++++++++++++++++++++-------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 29 ++++++++-----
>  3 files changed, 69 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index d103270..b300929 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -167,9 +167,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned num_ibs,
>               return r;
>       }
> 
> -     if (ring->funcs->init_cond_exec)
> -             patch_offset = amdgpu_ring_init_cond_exec(ring);
> -
>       if (vm) {
>               amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go
> too fast than DE */
> 
> @@ -180,7 +177,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned num_ibs,
>               }
>       }
> 
> -     if (ring->funcs->emit_hdp_flush
> +     if (ring->funcs->init_cond_exec)
> +             patch_offset = amdgpu_ring_init_cond_exec(ring);
> +
> +             if (ring->funcs->emit_hdp_flush
>  #ifdef CONFIG_X86_64
>           && !(adev->flags & AMD_IS_APU)
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9ff445c..74be4fa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -483,42 +483,59 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
> struct amdgpu_job *job)
>               id->oa_size != job->oa_size);
>       int r;
> 
> -     if (ring->funcs->emit_pipeline_sync && (
> -         job->vm_needs_flush || gds_switch_needed ||
> -         amdgpu_vm_ring_has_compute_vm_bug(ring)))
> -             amdgpu_ring_emit_pipeline_sync(ring);
> +     if (job->vm_needs_flush || gds_switch_needed ||
> +             amdgpu_vm_is_gpu_reset(adev, id) ||
> +             amdgpu_vm_ring_has_compute_vm_bug(ring)) {
> +             unsigned patch_offset = 0;
> 
> -     if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
> -         amdgpu_vm_is_gpu_reset(adev, id))) {
> -             struct fence *fence;
> -             u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job-
> >vm_pd_addr);
> +             if (ring->funcs->init_cond_exec)
> +                     patch_offset = amdgpu_ring_init_cond_exec(ring);
> 
> -             trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
> -             amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
> +             if (ring->funcs->emit_pipeline_sync &&
> +                     (job->vm_needs_flush || gds_switch_needed ||
> +                     amdgpu_vm_ring_has_compute_vm_bug(ring)))
> +                     amdgpu_ring_emit_pipeline_sync(ring);
> 
> -             r = amdgpu_fence_emit(ring, &fence);
> -             if (r)
> -                     return r;
> +             if (ring->funcs->emit_vm_flush && (job->vm_needs_flush
> ||
> +                     amdgpu_vm_is_gpu_reset(adev, id))) {
> +                     struct fence *fence;
> +                     u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev,
> job->vm_pd_addr);
> 
> -             mutex_lock(&adev->vm_manager.lock);
> -             fence_put(id->last_flush);
> -             id->last_flush = fence;
> -             mutex_unlock(&adev->vm_manager.lock);
> -     }
> +                     trace_amdgpu_vm_flush(pd_addr, ring->idx, job-
> >vm_id);
> +                     amdgpu_ring_emit_vm_flush(ring, job->vm_id,
> pd_addr);
> 
> -     if (gds_switch_needed) {
> -             id->gds_base = job->gds_base;
> -             id->gds_size = job->gds_size;
> -             id->gws_base = job->gws_base;
> -             id->gws_size = job->gws_size;
> -             id->oa_base = job->oa_base;
> -             id->oa_size = job->oa_size;
> -             amdgpu_ring_emit_gds_switch(ring, job->vm_id,
> -                                         job->gds_base, job->gds_size,
> -                                         job->gws_base, job->gws_size,
> -                                         job->oa_base, job->oa_size);
> -     }
> +                     r = amdgpu_fence_emit(ring, &fence);
> +                     if (r)
> +                             return r;
> 
> +                     mutex_lock(&adev->vm_manager.lock);
> +                     fence_put(id->last_flush);
> +                     id->last_flush = fence;
> +                     mutex_unlock(&adev->vm_manager.lock);
> +             }
> +
> +             if (gds_switch_needed) {
> +                     id->gds_base = job->gds_base;
> +                     id->gds_size = job->gds_size;
> +                     id->gws_base = job->gws_base;
> +                     id->gws_size = job->gws_size;
> +                     id->oa_base = job->oa_base;
> +                     id->oa_size = job->oa_size;
> +                     amdgpu_ring_emit_gds_switch(ring, job->vm_id,
> +                                                     job->gds_base, job-
> >gds_size,
> +                                                     job->gws_base, job-
> >gws_size,
> +                                                     job->oa_base, job-
> >oa_size);
> +             }
> +
> +             if (ring->funcs->patch_cond_exec)
> +                     amdgpu_ring_patch_cond_exec(ring, patch_offset);
> +
> +             /* the double SWITCH_BUFFER here *cannot* be skipped by
> COND_EXEC */
> +             if (ring->funcs->emit_switch_buffer) {
> +                     amdgpu_ring_emit_switch_buffer(ring);
> +                     amdgpu_ring_emit_switch_buffer(ring);
> +             }
> +     }
>       return 0;
>  }
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ce6fa03..eb8551b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3134,8 +3134,6 @@ static void gfx_v9_0_ring_emit_vm_flush(struct
> amdgpu_ring *ring,
>               /* sync PFP to ME, otherwise we might get invalid PFP reads
> */
>               amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME,
> 0));
>               amdgpu_ring_write(ring, 0x0);
> -             /* Emits 128 dw nop to prevent CE access VM before
> vm_flush finish */
> -             amdgpu_ring_insert_nop(ring, 128);
>       }
>  }
> 
> @@ -3629,15 +3627,24 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_gfx = {
>       .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
>       .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
>       .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
> -     .emit_frame_size =
> -             20 + /* gfx_v9_0_ring_emit_gds_switch */
> -             7 + /* gfx_v9_0_ring_emit_hdp_flush */
> -             5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
> -             8 + 8 + 8 +/* gfx_v9_0_ring_emit_fence x3 for user fence,
> vm fence */
> -             7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> -             128 + 66 + /* gfx_v9_0_ring_emit_vm_flush */
> -             2 + /* gfx_v9_ring_emit_sb */
> -             3, /* gfx_v9_ring_emit_cntxcntl */
> +     .emit_frame_size = /* totally 242 maximum if 16 IBs */
> +             5 +  /* COND_EXEC */
> +             7 +  /* PIPELINE_SYNC */
> +             46 + /* VM_FLUSH */
> +             8 +  /* FENCE for VM_FLUSH */
> +             20 + /* GDS switch */
> +             4 + /* double SWITCH_BUFFER,
> +                    the first COND_EXEC jump to the place just
> +                        prior to this double SWITCH_BUFFER  */
> +             5 + /* COND_EXEC */
> +             7 +      /*     HDP_flush */
> +             4 +      /*     VGT_flush */
> +             14 + /* CE_META */
> +             31 + /* DE_META */
> +             3 + /* CNTX_CTRL */
> +             5 + /* HDP_INVL */
> +             8 + 8 + /* FENCE x2 */
> +             2, /* SWITCH_BUFFER */
>       .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
>       .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
>       .emit_fence = gfx_v9_0_ring_emit_fence,
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to