If we end up emitting a VM fence keep GDS and SPM
associated with that fence.  If not, emit them as
part of the IB fence.

Signed-off-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 14 +++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 47 ++++++++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 ++-
 3 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index f1ed4a436f5b4..d439c68eed9de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -131,6 +131,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
int num_ibs,
        struct amdgpu_fence *af;
        struct amdgpu_fence *vm_af;
        bool need_ctx_switch;
+       bool emit_spm_needed = false;
+       bool emit_gds_needed = false;
        struct amdgpu_vm *vm;
        uint64_t fence_ctx;
        uint32_t status = 0, alloc_size;
@@ -220,7 +222,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
int num_ibs,
                vm_af = job->hw_vm_fence;
                /* VM sequence */
                vm_af->ib_wptr = ring->wptr;
-               amdgpu_vm_flush(ring, job, need_pipe_sync);
+               amdgpu_vm_flush(ring, job, need_pipe_sync, &emit_spm_needed,
+                               &emit_gds_needed);
                vm_af->ib_dw_size =
                        amdgpu_ring_get_dw_distance(ring, vm_af->ib_wptr, 
ring->wptr);
        }
@@ -232,6 +235,15 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
int num_ibs,
        if (ring->funcs->insert_start)
                ring->funcs->insert_start(ring);
 
+       if (emit_spm_needed)
+               adev->gfx.rlc.funcs->update_spm_vmid(adev, ring->xcc_id, ring, 
job->vmid);
+
+       if (emit_gds_needed)
+               amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
+                                           job->gds_size, job->gws_base,
+                                           job->gws_size, job->oa_base,
+                                           job->oa_size);
+
        if ((ib->flags & AMDGPU_IB_FLAG_EMIT_MEM_SYNC) && 
ring->funcs->emit_mem_sync)
                ring->funcs->emit_mem_sync(ring);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 13031e4b6f1d5..339902c41fc7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -764,18 +764,22 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring 
*ring,
  * @ring: ring to use for flush
  * @job:  related job
  * @need_pipe_sync: is pipe sync needed
+ * @emit_spm_needed: does the caller need to emit spm
+ * @emit_gds_needed: does the caller need to emit gds
  *
  * Emit a VM flush when it is necessary.
  */
 void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
-                    bool need_pipe_sync)
+                    bool need_pipe_sync, bool *emit_spm_needed,
+                    bool *emit_gds_needed)
 {
        struct amdgpu_device *adev = ring->adev;
        struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
        unsigned vmhub = ring->vm_hub;
        struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
        struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
-       bool spm_update_needed = job->spm_update_needed;
+       bool spm_update_needed = adev->gfx.rlc.funcs->update_spm_vmid &&
+               job->spm_update_needed;
        bool gds_switch_needed = ring->funcs->emit_gds_switch &&
                job->gds_switch_needed;
        bool vm_flush_needed = job->vm_needs_flush;
@@ -783,6 +787,7 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
        bool pasid_mapping_needed = false;
        struct dma_fence *fence = NULL;
        unsigned int patch = 0;
+       bool emit_fence;
 
        if (amdgpu_vmid_had_gpu_reset(adev, id)) {
                gds_switch_needed = true;
@@ -798,6 +803,7 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
        mutex_unlock(&id_mgr->lock);
 
        gds_switch_needed &= !!ring->funcs->emit_gds_switch;
+       spm_update_needed &= !!adev->gfx.rlc.funcs->update_spm_vmid;
        vm_flush_needed &= !!ring->funcs->emit_vm_flush  &&
                        job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
        pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
@@ -808,6 +814,17 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
                ring->funcs->emit_cleaner_shader && job->base.s_fence &&
                &job->base.s_fence->scheduled == isolation->spearhead;
 
+       emit_fence = !!(vm_flush_needed || pasid_mapping_needed ||
+                       cleaner_shader_needed);
+
+       *emit_spm_needed = spm_update_needed;
+       if (spm_update_needed && emit_fence)
+               *emit_spm_needed = false;
+
+       *emit_gds_needed = gds_switch_needed;
+       if (gds_switch_needed && emit_fence)
+               *emit_gds_needed = false;
+
        if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
            !cleaner_shader_needed && !spm_update_needed)
                return;
@@ -842,21 +859,21 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
        if (pasid_mapping_needed)
                amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
 
-       if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
-               adev->gfx.rlc.funcs->update_spm_vmid(adev, ring->xcc_id, ring, 
job->vmid);
+       if (emit_fence) {
+               if (spm_update_needed)
+                       adev->gfx.rlc.funcs->update_spm_vmid(adev, 
ring->xcc_id, ring, job->vmid);
 
-       if (ring->funcs->emit_gds_switch &&
-           gds_switch_needed) {
-               amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
-                                           job->gds_size, job->gws_base,
-                                           job->gws_size, job->oa_base,
-                                           job->oa_size);
-       }
+               if (gds_switch_needed)
+                       amdgpu_ring_emit_gds_switch(ring, job->vmid, 
job->gds_base,
+                                                   job->gds_size, 
job->gws_base,
+                                                   job->gws_size, job->oa_base,
+                                                   job->oa_size);
 
-       amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
-       fence = &job->hw_vm_fence->base;
-       /* get a ref for the job */
-       dma_fence_get(fence);
+               amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
+               fence = &job->hw_vm_fence->base;
+               /* get a ref for the job */
+               dma_fence_get(fence);
+       }
 
        if (vm_flush_needed) {
                mutex_lock(&id_mgr->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index cc096c005e348..b67eeec464e09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -514,7 +514,9 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
                       struct ww_acquire_ctx *ticket,
                       int (*callback)(void *p, struct amdgpu_bo *bo),
                       void *param);
-void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool 
need_pipe_sync);
+void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
+                    bool need_pipe_sync, bool *emit_spm_needed,
+                    bool *emit_gds_needed);
 int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
                          struct amdgpu_vm *vm, bool immediate);
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
-- 
2.54.0

Reply via email to