If we end up emitting a VM fence keep GDS and SPM associated with that fence. If not, emit them as part of the IB fence.
Signed-off-by: Alex Deucher <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 14 +++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 47 ++++++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++- 3 files changed, 48 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index f1ed4a436f5b4..d439c68eed9de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -131,6 +131,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, struct amdgpu_fence *af; struct amdgpu_fence *vm_af; bool need_ctx_switch; + bool emit_spm_needed = false; + bool emit_gds_needed = false; struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; @@ -220,7 +222,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, vm_af = job->hw_vm_fence; /* VM sequence */ vm_af->ib_wptr = ring->wptr; - amdgpu_vm_flush(ring, job, need_pipe_sync); + amdgpu_vm_flush(ring, job, need_pipe_sync, &emit_spm_needed, + &emit_gds_needed); vm_af->ib_dw_size = amdgpu_ring_get_dw_distance(ring, vm_af->ib_wptr, ring->wptr); } @@ -232,6 +235,15 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, if (ring->funcs->insert_start) ring->funcs->insert_start(ring); + if (emit_spm_needed) + adev->gfx.rlc.funcs->update_spm_vmid(adev, ring->xcc_id, ring, job->vmid); + + if (emit_gds_needed) + amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, + job->gds_size, job->gws_base, + job->gws_size, job->oa_base, + job->oa_size); + if ((ib->flags & AMDGPU_IB_FLAG_EMIT_MEM_SYNC) && ring->funcs->emit_mem_sync) ring->funcs->emit_mem_sync(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 13031e4b6f1d5..339902c41fc7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -764,18 +764,22 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, * @ring: ring to use for flush * @job: related job * @need_pipe_sync: is pipe sync needed + * @emit_spm_needed: does the caller need to emit spm + * @emit_gds_needed: does the caller need to emit gds * * Emit a VM flush when it is necessary. */ void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, - bool need_pipe_sync) + bool need_pipe_sync, bool *emit_spm_needed, + bool *emit_gds_needed) { struct amdgpu_device *adev = ring->adev; struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; - bool spm_update_needed = job->spm_update_needed; + bool spm_update_needed = adev->gfx.rlc.funcs->update_spm_vmid && + job->spm_update_needed; bool gds_switch_needed = ring->funcs->emit_gds_switch && job->gds_switch_needed; bool vm_flush_needed = job->vm_needs_flush; @@ -783,6 +787,7 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool pasid_mapping_needed = false; struct dma_fence *fence = NULL; unsigned int patch = 0; + bool emit_fence; if (amdgpu_vmid_had_gpu_reset(adev, id)) { gds_switch_needed = true; @@ -798,6 +803,7 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, mutex_unlock(&id_mgr->lock); gds_switch_needed &= !!ring->funcs->emit_gds_switch; + spm_update_needed &= !!adev->gfx.rlc.funcs->update_spm_vmid; vm_flush_needed &= !!ring->funcs->emit_vm_flush && job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && @@ -808,6 +814,17 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, ring->funcs->emit_cleaner_shader && job->base.s_fence && &job->base.s_fence->scheduled == isolation->spearhead; + emit_fence = !!(vm_flush_needed || pasid_mapping_needed || + cleaner_shader_needed); + + *emit_spm_needed = spm_update_needed; + if (spm_update_needed && emit_fence) + *emit_spm_needed = false; + + *emit_gds_needed = gds_switch_needed; + if (gds_switch_needed && emit_fence) + *emit_gds_needed = false; + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && !cleaner_shader_needed && !spm_update_needed) return; @@ -842,21 +859,21 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (pasid_mapping_needed) amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); - if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, ring->xcc_id, ring, job->vmid); + if (emit_fence) { + if (spm_update_needed) + adev->gfx.rlc.funcs->update_spm_vmid(adev, ring->xcc_id, ring, job->vmid); - if (ring->funcs->emit_gds_switch && - gds_switch_needed) { - amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, - job->gds_size, job->gws_base, - job->gws_size, job->oa_base, - job->oa_size); - } + if (gds_switch_needed) + amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, + job->gds_size, job->gws_base, + job->gws_size, job->oa_base, + job->oa_size); - amdgpu_fence_emit(ring, job->hw_vm_fence, 0); - fence = &job->hw_vm_fence->base; - /* get a ref for the job */ - dma_fence_get(fence); + amdgpu_fence_emit(ring, job->hw_vm_fence, 0); + fence = &job->hw_vm_fence->base; + /* get a ref for the job */ + dma_fence_get(fence); + } if (vm_flush_needed) { mutex_lock(&id_mgr->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index cc096c005e348..b67eeec464e09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -514,7 +514,9 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); -void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); +void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, + bool need_pipe_sync, bool *emit_spm_needed, + bool *emit_gds_needed); int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, -- 2.54.0
