[Public] This change is based on Linux 6.19-rc7
Skip the expensive per-BO hmm_range_fault page table walk if no MMU invalidation occurred since last successful submit. Here it's a single seq counter compare vs the full HMM walk On first submit the slow path runs and caches notifier_seq into bo->last_valid_notifier_seq. Subsequent submits check with the cached seq for the fast-path. If an MMU notifier fires, the seq changes accordingly and the slow path runs again on affected BOs. Signed-off-by: Wang, Beyond <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 25 +++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 +++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ecdfe6cb36cc..140346e9cb92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -891,6 +891,17 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, bool userpage_invalidated = false; struct amdgpu_bo *bo = e->bo; + /* + * Skip the expensive HMM page table walk if no MMU invalidation occurred + * since last successful submit + */ + if (bo->last_valid_notifier_seq && + !mmu_interval_read_retry(&bo->notifier, bo->last_valid_notifier_seq)) { + e->range = NULL; + e->user_invalidated = false; + continue; + } + e->range = amdgpu_hmm_range_alloc(NULL); if (unlikely(!e->range)) return -ENOMEM; @@ -1326,7 +1337,19 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, */ r = 0; amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - r |= !amdgpu_hmm_range_valid(e->range); + struct amdgpu_bo *bo = e->bo; + + if (!e->range) { + if (mmu_interval_read_retry(&bo->notifier, bo->last_valid_notifier_seq)) + r = 1; + continue; + } + + if (!amdgpu_hmm_range_valid(e->range)) + r = 1; + else + bo->last_valid_notifier_seq = e->range->hmm_range.notifier_seq; + amdgpu_hmm_range_free(e->range); e->range = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 52c2d1731aab..89dc9ee1176e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -115,6 +115,14 @@ struct amdgpu_bo { #ifdef CONFIG_MMU_NOTIFIER struct mmu_interval_notifier notifier; + + /* + * Cached notifier_seq from last successful CS submit. + * Used to skip the expensive HMM page table walk when + * no MMU invalidation has occurred since last validation. + * Zero means never validated (always takes the slow path). + */ + unsigned long last_valid_notifier_seq; #endif struct kgd_mem *kfd_bo; -- 2.43.0 Thanks, Beyond
