On 23.03.2017 19:18, Christian König wrote:
Am 23.03.2017 um 18:22 schrieb Nicolai Hähnle:
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Also, add the fence of the clear operations to the BO to ensure that
the underlying memory can only be re-used after all PTEs pointing to
it have been cleared.

This avoids the following sequence of events that could be triggered
by user space:

1. Submit a CS that accesses some BO _without_ adding that BO to the
    buffer list.
2. Free that BO.
3. Some other task re-uses the memory underlying the BO.
4. The CS is submitted to the hardware and accesses memory that is
    now already in use by somebody else.

By clearing the page tables immediately in step 2, a GPU VM fault will
be triggered in step 4 instead of wild memory accesses.

First of all please split adding the fence parameter to
amdgpu_vm_clear_freed() into a separate patch.

Sure, I'll do that.


Not a must have, but that should make it easier to review.


Signed-off-by: Nicolai Hähnle <nicolai.haeh...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 13 +++++++++++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 20 ++++++++++++++------
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++-
  4 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 55d553a..85e6070 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -778,21 +778,21 @@ static int amdgpu_bo_vm_update_pte(struct
amdgpu_cs_parser *p)
      int i, r;
        r = amdgpu_vm_update_page_directory(adev, vm);
      if (r)
          return r;
        r = amdgpu_sync_fence(adev, &p->job->sync,
vm->page_directory_fence);
      if (r)
          return r;
  -    r = amdgpu_vm_clear_freed(adev, vm);
+    r = amdgpu_vm_clear_freed(adev, vm, NULL);
      if (r)
          return r;
        r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
      if (r)
          return r;
        r = amdgpu_sync_fence(adev, &p->job->sync,
                    fpriv->prt_va->last_pt_update);
      if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index be9fb2c..bd2daef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -145,20 +145,21 @@ void amdgpu_gem_object_close(struct
drm_gem_object *obj,
      struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
      struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
      struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
      struct amdgpu_vm *vm = &fpriv->vm;
        struct amdgpu_bo_list_entry vm_pd;
      struct list_head list, duplicates;
      struct ttm_validate_buffer tv;
      struct ww_acquire_ctx ticket;
      struct amdgpu_bo_va *bo_va;
+    struct fence *fence = NULL;
      int r;
        INIT_LIST_HEAD(&list);
      INIT_LIST_HEAD(&duplicates);
        tv.bo = &bo->tbo;
      tv.shared = true;
      list_add(&tv.head, &list);
        amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
@@ -166,23 +167,31 @@ void amdgpu_gem_object_close(struct
drm_gem_object *obj,
      r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
      if (r) {
          dev_err(adev->dev, "leaking bo va because "
              "we fail to reserve bo (%d)\n", r);
          return;
      }
      bo_va = amdgpu_vm_bo_find(vm, bo);
      if (bo_va) {
          if (--bo_va->ref_count == 0) {
              amdgpu_vm_bo_rmv(adev, bo_va);
+
+            amdgpu_vm_clear_freed(adev, vm, &fence);
          }
      }
-    ttm_eu_backoff_reservation(&ticket, &list);
+
+    if (fence) {
+        ttm_eu_fence_buffer_objects(&ticket, &list, fence);
+        fence_put(fence);
+    } else {
+        ttm_eu_backoff_reservation(&ticket, &list);
+    }

Ah, now I remember why we didn't do that before. We could run into
problems because amdgpu_gem_object_close() can't fail and adding this
needs memory.

Anyway, "tv.shared = true;" was there before. So your patch doesn't make
it any worse.

But I suggest to use amdgpu_bo_fence() instead of
ttm_eu_fence_buffer_objects(). This way we don't try to add the fence to
the page directory.

Just checked, the fence is added to the page directory anyway, in amdgpu_vm_bo_update_mapping. I think that's necessary to make sure subsequent CS submissions see the cleared page tables.

Anyway, it still makes sense to remove the call to ttm_eu_fence_buffer_objects here. That's more explicit about who is responsible for adding fences to what.

Thanks,
Nicolai


Apart from that the patch looks good to me,
Christian.

  }
    static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev,
int r)
  {
      if (r == -EDEADLK) {
          r = amdgpu_gpu_reset(adev);
          if (!r)
              r = -EAGAIN;
      }
      return r;
@@ -535,21 +544,21 @@ static void amdgpu_gem_va_update_vm(struct
amdgpu_device *adev,
        r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check,
                        NULL);
      if (r)
          goto error;
        r = amdgpu_vm_update_page_directory(adev, vm);
      if (r)
          goto error;
  -    r = amdgpu_vm_clear_freed(adev, vm);
+    r = amdgpu_vm_clear_freed(adev, vm, NULL);
      if (r)
          goto error;
        if (operation == AMDGPU_VA_OP_MAP ||
          operation == AMDGPU_VA_OP_REPLACE)
          r = amdgpu_vm_bo_update(adev, bo_va, false);
    error:
      if (r && r != -ERESTARTSYS)
          DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dd7df45..b6029ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1397,48 +1397,56 @@ static void amdgpu_vm_prt_fini(struct
amdgpu_device *adev, struct amdgpu_vm *vm)
      }
        kfree(shared);
  }
    /**
   * amdgpu_vm_clear_freed - clear freed BOs in the PT
   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
+ * @fence: optional resulting fence
   *
   * Make sure all freed BOs are cleared in the PT.
   * Returns 0 for success.
   *
   * PTs have to be reserved and mutex must be locked!
   */
  int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
-              struct amdgpu_vm *vm)
+              struct amdgpu_vm *vm,
+              struct fence **fence)
  {
      struct amdgpu_bo_va_mapping *mapping;
-    struct fence *fence = NULL;
+    struct fence *f = NULL;
      int r;
        while (!list_empty(&vm->freed)) {
          mapping = list_first_entry(&vm->freed,
              struct amdgpu_bo_va_mapping, list);
          list_del(&mapping->list);
            r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm,
mapping,
-                           0, 0, &fence);
-        amdgpu_vm_free_mapping(adev, vm, mapping, fence);
+                           0, 0, &f);
+        amdgpu_vm_free_mapping(adev, vm, mapping, f);
          if (r) {
-            fence_put(fence);
+            fence_put(f);
              return r;
          }
+    }
  +    if (fence && f) {
+        fence_put(*fence);
+        *fence = f;
+    } else {
+        fence_put(f);
      }
-    fence_put(fence);
+
      return 0;
    }
    /**
   * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT
   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
   *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ff10fa5..9d5a572 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -187,21 +187,22 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
              struct amdgpu_vm *vm,
              uint64_t saddr, uint64_t size);
  int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
                struct amdgpu_sync *sync, struct fence *fence,
                struct amdgpu_job *job);
  int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
  void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
  int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
                      struct amdgpu_vm *vm);
  int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
-              struct amdgpu_vm *vm);
+              struct amdgpu_vm *vm,
+              struct fence **fence);
  int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct
amdgpu_vm *vm,
                   struct amdgpu_sync *sync);
  int amdgpu_vm_bo_update(struct amdgpu_device *adev,
              struct amdgpu_bo_va *bo_va,
              bool clear);
  void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
                   struct amdgpu_bo *bo);
  struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
                         struct amdgpu_bo *bo);
  struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,



_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to