Am 05.04.25 um 09:32 schrieb SRINIVASAN SHANMUGAM:
>
> On 4/4/2025 7:46 PM, Christian König wrote:
>> This reverts commit c2cc3648ba517a6c270500b5447d5a1efdad5936. Turned out
>> that this has some negative consequences for some workloads. Instead check
>> if the cleaner shader should run directly.
>>
>> While at it remove amdgpu_vm_need_pipeline_sync(), we also check again
>> if the VMID has seen a GPU reset since last use and the gds switch
>> setiing can be handled more simply as well.
>>
>> Also remove some duplicate checks and re-order and document the code.
>>
>> v2: restructure the while function
>> v3: fix logic error pointed out by Srini
>>
>> Signed-off-by: Christian König <[email protected]>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  3 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 92 +++++++++-----------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 +-
>>   3 files changed, 36 insertions(+), 64 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> index 802743efa3b3..ff2ca984279a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> @@ -191,8 +191,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
>> unsigned int num_ibs,
>>       need_ctx_switch = ring->current_ctx != fence_ctx;
>>       if (ring->funcs->emit_pipeline_sync && job &&
>>           ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
>> -         need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) {
>> -
>> +         (amdgpu_sriov_vf(adev) && need_ctx_switch))) {
>>           need_pipe_sync = true;
>>             if (tmp)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index b5ddfcbbc9fc..e6d7db8d40cd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -596,37 +596,6 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device 
>> *adev)
>>       }
>>   }
>>   -/**
>> - * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
>> - *
>> - * @ring: ring on which the job will be submitted
>> - * @job: job to submit
>> - *
>> - * Returns:
>> - * True if sync is needed.
>> - */
>> -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>> -                  struct amdgpu_job *job)
>> -{
>> -    struct amdgpu_device *adev = ring->adev;
>> -    unsigned vmhub = ring->vm_hub;
>> -    struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>> -
>> -    if (job->vmid == 0)
>> -        return false;
>> -
>> -    if (job->vm_needs_flush || ring->has_compute_vm_bug)
>> -        return true;
>> -
>> -    if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
>> -        return true;
>> -
>> -    if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
>> -        return true;
>> -
>> -    return false;
>> -}
>> -
>>   /**
>>    * amdgpu_vm_flush - hardware flush the vm
>>    *
>> @@ -647,29 +616,31 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
>> amdgpu_job *job,
>>       unsigned vmhub = ring->vm_hub;
>>       struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>>       struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
>> -    bool spm_update_needed = job->spm_update_needed;
>> -    bool gds_switch_needed = ring->funcs->emit_gds_switch &&
>> -        job->gds_switch_needed;
>> -    bool vm_flush_needed = job->vm_needs_flush;
>> -    bool cleaner_shader_needed = false;
>> -    bool pasid_mapping_needed = false;
>> -    struct dma_fence *fence = NULL;
>> +    bool gds_switch_needed, vm_flush_needed, spm_update_needed,
>> +         cleaner_shader_needed, pasid_mapping_needed;
>> +    struct dma_fence *fence;
>>       unsigned int patch;
>>       int r;
>>   +    /* First of all figure out what needs to be done */
>>       if (amdgpu_vmid_had_gpu_reset(adev, id)) {
>> +        need_pipe_sync = true;
>>           gds_switch_needed = true;
>>           vm_flush_needed = true;
>>           pasid_mapping_needed = true;
>>           spm_update_needed = true;
>> +    } else {
>> +        need_pipe_sync |= ring->has_compute_vm_bug;
>> +        gds_switch_needed = job->gds_switch_needed;
>
>
> *[1]:* Should we need to check along with "ring->funcs->emit_gds_switch"
> ie., "ring->funcs->emit_gds_switch && job->gds_switch_needed;" here?

No, see below.

>
>
>> +        vm_flush_needed = job->vm_needs_flush;
>> +        mutex_lock(&id_mgr->lock);
>> +        if (id->pasid != job->pasid || !id->pasid_mapping ||
>> +            !dma_fence_is_signaled(id->pasid_mapping))
>> +            pasid_mapping_needed = true;
>> +        mutex_unlock(&id_mgr->lock);
>> +        spm_update_needed = job->spm_update_needed;
>>       }
>>   -    mutex_lock(&id_mgr->lock);
>> -    if (id->pasid != job->pasid || !id->pasid_mapping ||
>> -        !dma_fence_is_signaled(id->pasid_mapping))
>> -        pasid_mapping_needed = true;
>> -    mutex_unlock(&id_mgr->lock);
>> -
>>       gds_switch_needed &= !!ring->funcs->emit_gds_switch;

That check is here.

>>       vm_flush_needed &= !!ring->funcs->emit_vm_flush  &&
>>               job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
>> @@ -681,15 +652,17 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
>> amdgpu_job *job,
>>           &job->base.s_fence->scheduled == isolation->spearhead;
>>         if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
>> -        !cleaner_shader_needed)
>> +        !cleaner_shader_needed && !spm_update_needed)
>>           return 0;
>>   +    /* Then aktually prepare the submission frame */
>
>
> *[2]:* Could you pls correct the typo?

Mhm, I thought I already did that.

Ok, going to fix it now.

>
>
>>       amdgpu_ring_ib_begin(ring);
>>       if (ring->funcs->init_cond_exec)
>>           patch = amdgpu_ring_init_cond_exec(ring,
>>                              ring->cond_exe_gpu_addr);
>>   -    if (need_pipe_sync)
>> +    if (need_pipe_sync || vm_flush_needed || cleaner_shader_needed ||
>> +        gds_switch_needed)
>
>
> *[3]:* Can we check even for *spm_update_needed* alongside the other 
> conditions, that might need Pipeline Sync here?, Cz in an environment where 
> multiple jobs might be accessing or modifying shared data (especially in gang 
> submissions), ensuring that all operations (including SPM updates) are 
> completed before executing further commands, to prevent using stale or 
> invalid data.

No, the SPM update is intentionally left out here since those shouldn't 
interfere with the pipeline sync.

Regards,
Christian.

>
>
>>           amdgpu_ring_emit_pipeline_sync(ring);
>>         if (cleaner_shader_needed)
>> @@ -706,20 +679,31 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
>> amdgpu_job *job,
>>       if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
>>           adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
>>   -    if (ring->funcs->emit_gds_switch &&
>> -        gds_switch_needed) {
>> +    if (gds_switch_needed)
>>           amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
>>                           job->gds_size, job->gws_base,
>>                           job->gws_size, job->oa_base,
>>                           job->oa_size);
>> -    }
>>         if (vm_flush_needed || pasid_mapping_needed || 
>> cleaner_shader_needed) {
>>           r = amdgpu_fence_emit(ring, &fence, NULL, 0);
>>           if (r)
>>               return r;
>> +    } else {
>> +        fence = NULL;
>> +    }
>> +
>> +    amdgpu_ring_patch_cond_exec(ring, patch);
>> +
>> +    /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
>> +    if (ring->funcs->emit_switch_buffer) {
>> +        amdgpu_ring_emit_switch_buffer(ring);
>> +        amdgpu_ring_emit_switch_buffer(ring);
>>       }
>>   +    amdgpu_ring_ib_end(ring);
>> +
>> +    /* And finally remember what the ring has executed */
>>       if (vm_flush_needed) {
>>           mutex_lock(&id_mgr->lock);
>>           dma_fence_put(id->last_flush);
>> @@ -749,16 +733,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
>> amdgpu_job *job,
>>           mutex_unlock(&adev->enforce_isolation_mutex);
>>       }
>>       dma_fence_put(fence);
>> -
>> -    amdgpu_ring_patch_cond_exec(ring, patch);
>> -
>> -    /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
>> -    if (ring->funcs->emit_switch_buffer) {
>> -        amdgpu_ring_emit_switch_buffer(ring);
>> -        amdgpu_ring_emit_switch_buffer(ring);
>> -    }
>> -
>> -    amdgpu_ring_ib_end(ring);
>>       return 0;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index daa2f9b33620..e9ecdb96bafa 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -493,7 +493,8 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, 
>> struct amdgpu_vm *vm,
>>                  struct ww_acquire_ctx *ticket,
>>                  int (*callback)(void *p, struct amdgpu_bo *bo),
>>                  void *param);
>> -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool 
>> need_pipe_sync);
>> +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>> +            bool need_pipe_sync);
>>   int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>>                 struct amdgpu_vm *vm, bool immediate);
>>   int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>> @@ -550,8 +551,6 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, 
>> uint32_t min_vm_size,
>>                  uint32_t fragment_size_default, unsigned max_level,
>>                  unsigned max_bits);
>>   int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file 
>> *filp);
>> -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>> -                  struct amdgpu_job *job);
>>   void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
>>     struct amdgpu_task_info *

Reply via email to