To address the problem with hitches moving when bulk move
sublists are lru-bumped, register the list cursors with the
ttm_lru_bulk_move structure when traversing its list, and
when lru-bumping the list, move the cursor hitch to the tail.
This also means it's mandatory for drivers to call
ttm_lru_bulk_move_init() and ttm_lru_bulk_move_fini() when
initializing and finalizing the bulk move structure, so add
those calls to the amdgpu- and xe driver.
Compared to v1 this is slightly more code but less fragile
and hopefully easier to understand.
v2:
- Completely rework the functionality
v3:
- Avoid a NULL pointer dereference assigning manager->mem_type
Cc: Christian König
Cc: Somalapuram Amaranath
Cc:
Signed-off-by: Thomas Hellström
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++
drivers/gpu/drm/ttm/ttm_resource.c | 90 +-
drivers/gpu/drm/xe/xe_vm.c | 4 ++
include/drm/ttm/ttm_device.h | 5 ++
include/drm/ttm/ttm_resource.h | 55 ++--
5 files changed, 137 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ed4a8c5d26d7..7c2ee5d12bc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2264,6 +2264,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct
amdgpu_vm *vm,
if (r)
return r;
+ ttm_lru_bulk_move_init(>lru_bulk_move);
+
vm->pte_support_ats = false;
vm->is_compute_context = false;
@@ -2324,6 +2326,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct
amdgpu_vm *vm,
error_free_delayed:
dma_fence_put(vm->last_tlb_flush);
dma_fence_put(vm->last_unlocked);
+ ttm_lru_bulk_move_fini(>mman.bdev, >lru_bulk_move);
amdgpu_vm_fini_entities(vm);
return r;
@@ -2497,6 +2500,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct
amdgpu_vm *vm)
}
}
+ ttm_lru_bulk_move_fini(>mman.bdev, >lru_bulk_move);
}
/**
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c
b/drivers/gpu/drm/ttm/ttm_resource.c
index 971014fca10a..0acd4bf764b2 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -32,6 +32,49 @@
#include
+/* Detach the cursor from the bulk move list*/
+static void
+ttm_resource_cursor_clear_bulk(struct ttm_resource_cursor *cursor)
+{
+ cursor->bulk = NULL;
+ list_del_init(>bulk_link);
+}
+
+/* Move the cursor to the end of the bulk move list it's in */
+static void ttm_resource_cursor_move_bulk_tail(struct ttm_lru_bulk_move *bulk,
+ struct ttm_resource_cursor
*cursor)
+{
+ struct ttm_lru_bulk_move_pos *pos;
+
+ if (WARN_ON_ONCE(bulk != cursor->bulk)) {
+ list_del_init(>bulk_link);
+ return;
+ }
+
+ pos = >pos[cursor->man->mem_type][cursor->priority];
+ if (pos)
+ list_move(>hitch.link, >last->lru.link);
+ ttm_resource_cursor_clear_bulk(cursor);
+}
+
+/* Move all cursors attached to a bulk move to its end */
+static void ttm_bulk_move_adjust_cursors(struct ttm_lru_bulk_move *bulk)
+{
+ struct ttm_resource_cursor *cursor, *next;
+
+ list_for_each_entry_safe(cursor, next, >cursor_list, bulk_link)
+ ttm_resource_cursor_move_bulk_tail(bulk, cursor);
+}
+
+/* Remove a cursor from an empty bulk move list */
+static void ttm_bulk_move_drop_cursors(struct ttm_lru_bulk_move *bulk)
+{
+ struct ttm_resource_cursor *cursor, *next;
+
+ list_for_each_entry_safe(cursor, next, >cursor_list, bulk_link)
+ ttm_resource_cursor_clear_bulk(cursor);
+}
+
/**
* ttm_resource_cursor_fini_locked() - Finalize the LRU list cursor usage
* @cursor: The struct ttm_resource_cursor to finalize.
@@ -44,6 +87,7 @@ void ttm_resource_cursor_fini_locked(struct
ttm_resource_cursor *cursor)
{
lockdep_assert_held(>man->bdev->lru_lock);
list_del_init(>hitch.link);
+ ttm_resource_cursor_clear_bulk(cursor);
}
/**
@@ -72,9 +116,27 @@ void ttm_resource_cursor_fini(struct ttm_resource_cursor
*cursor)
void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk)
{
memset(bulk, 0, sizeof(*bulk));
+ INIT_LIST_HEAD(>cursor_list);
}
EXPORT_SYMBOL(ttm_lru_bulk_move_init);
+/**
+ * ttm_lru_bulk_move_fini - finalize a bulk move structure
+ * @bdev: The struct ttm_device
+ * @bulk: the structure to finalize
+ *
+ * Sanity checks that bulk moves don't have any
+ * resources left and hence no cursors attached.
+ */
+void ttm_lru_bulk_move_fini(struct ttm_device *bdev,
+ struct ttm_lru_bulk_move *bulk)
+{
+ spin_lock(>lru_lock);
+ ttm_bulk_move_drop_cursors(bulk);
+ spin_unlock(>lru_lock);
+}
+EXPORT_SYMBOL(ttm_lru_bulk_move_fini);
+
/**
* ttm_lru_bulk_move_tail - bulk move range of resources to the LRU tail.