issue:
Game F1 performance drops 13% when per vm bo is enabled.

root cause:
if some BOs are fallback to allowed domain, they will never be validated if no 
eviction happens,
that means they always exist in allowed domain.

Fix:
maintain a per vm allowed domain BOs list, then try to validated them with 
perferred domain.

Change-Id: I4335470bf867b46ac93c8e2531eac3f8ba9ac2da
Signed-off-by: Chunming Zhou <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 15 +++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 49 ++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  7 ++++-
 3 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 383bf2d31c92..7509b6bd2047 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -359,7 +359,7 @@ void amdgpu_cs_report_moved_bytes(struct amdgpu_device 
*adev, u64 num_bytes,
 }
 
 static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
-                                struct amdgpu_bo *bo)
+                                struct amdgpu_bo *bo, bool *allowed)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
        struct ttm_operation_ctx ctx = {
@@ -374,6 +374,8 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
        if (bo->pin_count)
                return 0;
 
+       if (allowed)
+               *allowed = false;
        /* Don't move this buffer if we have depleted our allowance
         * to move it. Don't move anything if the threshold is zero.
         */
@@ -396,6 +398,9 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
        }
 
 retry:
+       if (domain != bo->preferred_domains && domain == bo->allowed_domains &&
+           allowed)
+               *allowed = true;
        amdgpu_ttm_placement_from_domain(bo, domain);
        r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 
@@ -479,19 +484,19 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser 
*p,
        return false;
 }
 
-static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
+static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo, bool *allowed)
 {
        struct amdgpu_cs_parser *p = param;
        int r;
 
        do {
-               r = amdgpu_cs_bo_validate(p, bo);
+               r = amdgpu_cs_bo_validate(p, bo, allowed);
        } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
        if (r)
                return r;
 
        if (bo->shadow)
-               r = amdgpu_cs_bo_validate(p, bo->shadow);
+               r = amdgpu_cs_bo_validate(p, bo->shadow, NULL);
 
        return r;
 }
@@ -528,7 +533,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser 
*p,
                if (p->evictable == lobj)
                        p->evictable = NULL;
 
-               r = amdgpu_cs_validate(p, bo);
+               r = amdgpu_cs_validate(p, bo, NULL);
                if (r)
                        return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e9a41dd05345..365e8dca05f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -186,6 +186,35 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
        list_add(&entry->tv.head, validated);
 }
 
+static int amdgpu_vm_try_validate_allowed(struct amdgpu_device *adev,
+                                         struct amdgpu_vm *vm,
+                                         int (*validate)(void *p,
+                                                         struct amdgpu_bo *bo,
+                                                         bool *allowed),
+                                         void *param)
+{
+       struct amdgpu_vm_bo_base *bo_base, *tmp;
+       int r;
+       bool allowed;
+
+       spin_lock(&vm->status_lock);
+       list_for_each_entry_safe(bo_base, tmp, &vm->allowed_domain,
+                                allowed_domain_list) {
+               spin_unlock(&vm->status_lock);
+               r = validate(param, bo_base->bo, &allowed);
+               if (r)
+                       return r;
+               spin_lock(&vm->status_lock);
+               if (!allowed)
+                       list_del_init(&bo_base->allowed_domain_list);
+               if (bo_base->bo->tbo.type != ttm_bo_type_kernel)
+                       list_move(&bo_base->vm_status, &vm->moved);
+               else
+                       list_move(&bo_base->vm_status, &vm->relocated);
+       }
+       spin_unlock(&vm->status_lock);
+       return 0;
+}
 /**
  * amdgpu_vm_validate_pt_bos - validate the page table BOs
  *
@@ -197,16 +226,19 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
  * Validate the page table BOs on command submission if neccessary.
  */
 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-                             int (*validate)(void *p, struct amdgpu_bo *bo),
+                             int (*validate)(void *p, struct amdgpu_bo *bo,
+                                             bool *allowed),
                              void *param)
 {
        struct ttm_bo_global *glob = adev->mman.bdev.glob;
+       LIST_HEAD(tmp_allowed);
        int r;
 
        spin_lock(&vm->status_lock);
        while (!list_empty(&vm->evicted)) {
                struct amdgpu_vm_bo_base *bo_base;
                struct amdgpu_bo *bo;
+               bool allowed = false;
 
                bo_base = list_first_entry(&vm->evicted,
                                           struct amdgpu_vm_bo_base,
@@ -216,7 +248,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
                bo = bo_base->bo;
                BUG_ON(!bo);
                if (bo->parent) {
-                       r = validate(param, bo);
+                       r = validate(param, bo, &allowed);
                        if (r)
                                return r;
 
@@ -235,6 +267,10 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
                }
 
                spin_lock(&vm->status_lock);
+               if (allowed)
+                       list_add_tail(&bo_base->allowed_domain_list,
+                                     &tmp_allowed);
+
                if (bo->tbo.type != ttm_bo_type_kernel)
                        list_move(&bo_base->vm_status, &vm->moved);
                else
@@ -242,6 +278,12 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
        }
        spin_unlock(&vm->status_lock);
 
+       r = amdgpu_vm_try_validate_allowed(adev, vm, validate, param);
+       if (r)
+               return r;
+       spin_lock(&vm->status_lock);
+       list_splice_init(&tmp_allowed, &vm->allowed_domain);
+       spin_unlock(&vm->status_lock);
        return 0;
 }
 
@@ -1868,6 +1910,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct 
amdgpu_device *adev,
        bo_va->base.bo = bo;
        INIT_LIST_HEAD(&bo_va->base.bo_list);
        INIT_LIST_HEAD(&bo_va->base.vm_status);
+       INIT_LIST_HEAD(&bo_va->base.allowed_domain_list);
 
        bo_va->ref_count = 1;
        INIT_LIST_HEAD(&bo_va->valids);
@@ -2237,6 +2280,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 
        spin_lock(&vm->status_lock);
        list_del(&bo_va->base.vm_status);
+       list_del(&bo_va->base.allowed_domain_list);
        spin_unlock(&vm->status_lock);
 
        list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
@@ -2409,6 +2453,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
        for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
                vm->reserved_vmid[i] = NULL;
        spin_lock_init(&vm->status_lock);
+       INIT_LIST_HEAD(&vm->allowed_domain);
        INIT_LIST_HEAD(&vm->evicted);
        INIT_LIST_HEAD(&vm->relocated);
        INIT_LIST_HEAD(&vm->moved);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index cf2c667ee538..54c39d3ea6d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -149,6 +149,7 @@ struct amdgpu_vm_bo_base {
 
        /* protected by spinlock */
        struct list_head                vm_status;
+       struct list_head                allowed_domain_list;
 
        /* protected by the BO being reserved */
        bool                            moved;
@@ -177,6 +178,9 @@ struct amdgpu_vm {
        /* protecting invalidated */
        spinlock_t              status_lock;
 
+       /* per vm bo is validated to allowed domain */
+       struct list_head        allowed_domain;
+
        /* BOs who needs a validation */
        struct list_head        evicted;
 
@@ -266,7 +270,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
                         struct amdgpu_bo_list_entry *entry);
 bool amdgpu_vm_ready(struct amdgpu_vm *vm);
 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-                             int (*callback)(void *p, struct amdgpu_bo *bo),
+                             int (*callback)(void *p, struct amdgpu_bo *bo,
+                                             bool *allowed),
                              void *param);
 int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
                        struct amdgpu_vm *vm,
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to