Re: [PATCH v3 3/4] drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist moves

2024-03-05 Thread kernel test robot
Hi Thomas,

kernel test robot noticed the following build warnings:

[auto build test WARNING on drm-misc/drm-misc-next]
[also build test WARNING on drm-intel/for-linux-next drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next-fixes linus/master v6.8-rc7 
next-20240305]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-ttm-Allow-TTM-LRU-list-nodes-of-different-types/20240306-000453
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:
https://lore.kernel.org/r/20240305160202.3555-4-thomas.hellstrom%40linux.intel.com
patch subject: [PATCH v3 3/4] drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves 
within bulk sublist moves
config: arm-defconfig 
(https://download.01.org/0day-ci/archive/20240306/202403060640.zcxpvobi-...@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project.git 
f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build): 
(https://download.01.org/0day-ci/archive/20240306/202403060640.zcxpvobi-...@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202403060640.zcxpvobi-...@intel.com/

All warnings (new ones prefixed by >>):

   In file included from drivers/gpu/drm/drm_gem_ttm_helper.c:5:
   In file included from include/drm/drm_gem_ttm_helper.h:10:
   In file included from include/drm/ttm/ttm_bo.h:39:
>> include/drm/ttm/ttm_device.h:286:31: warning: variable 'old' set but not 
>> used [-Wunused-but-set-variable]
   struct ttm_resource_manager *old;
^
   1 warning generated.


vim +/old +286 include/drm/ttm/ttm_device.h

   282  
   283  static inline void ttm_set_driver_manager(struct ttm_device *bdev, int 
type,
   284struct ttm_resource_manager 
*manager)
   285  {
 > 286  struct ttm_resource_manager *old;
   287  
   288  BUILD_BUG_ON(__builtin_constant_p(type) && type >= 
TTM_NUM_MEM_TYPES);
   289  old = bdev->man_drv[type];
   290  bdev->man_drv[type] = manager;
   291  if (manager)
   292  manager->mem_type = type;
   293  }
   294  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


[PATCH v3 3/4] drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist moves

2024-03-05 Thread Thomas Hellström
To address the problem with hitches moving when bulk move
sublists are lru-bumped, register the list cursors with the
ttm_lru_bulk_move structure when traversing its list, and
when lru-bumping the list, move the cursor hitch to the tail.
This also means it's mandatory for drivers to call
ttm_lru_bulk_move_init() and ttm_lru_bulk_move_fini() when
initializing and finalizing the bulk move structure, so add
those calls to the amdgpu- and xe driver.

Compared to v1 this is slightly more code but less fragile
and hopefully easier to understand.

v2:
- Completely rework the functionality
v3:
- Avoid a NULL pointer dereference assigning manager->mem_type

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  4 ++
 drivers/gpu/drm/ttm/ttm_resource.c | 90 +-
 drivers/gpu/drm/xe/xe_vm.c |  4 ++
 include/drm/ttm/ttm_device.h   |  5 ++
 include/drm/ttm/ttm_resource.h | 55 ++--
 5 files changed, 137 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ed4a8c5d26d7..7c2ee5d12bc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2264,6 +2264,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
if (r)
return r;
 
+   ttm_lru_bulk_move_init(>lru_bulk_move);
+
vm->pte_support_ats = false;
vm->is_compute_context = false;
 
@@ -2324,6 +2326,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
 error_free_delayed:
dma_fence_put(vm->last_tlb_flush);
dma_fence_put(vm->last_unlocked);
+   ttm_lru_bulk_move_fini(>mman.bdev, >lru_bulk_move);
amdgpu_vm_fini_entities(vm);
 
return r;
@@ -2497,6 +2500,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
}
}
 
+   ttm_lru_bulk_move_fini(>mman.bdev, >lru_bulk_move);
 }
 
 /**
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 971014fca10a..0acd4bf764b2 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -32,6 +32,49 @@
 
 #include 
 
+/* Detach the cursor from the bulk move list*/
+static void
+ttm_resource_cursor_clear_bulk(struct ttm_resource_cursor *cursor)
+{
+   cursor->bulk = NULL;
+   list_del_init(>bulk_link);
+}
+
+/* Move the cursor to the end of the bulk move list it's in */
+static void ttm_resource_cursor_move_bulk_tail(struct ttm_lru_bulk_move *bulk,
+  struct ttm_resource_cursor 
*cursor)
+{
+   struct ttm_lru_bulk_move_pos *pos;
+
+   if (WARN_ON_ONCE(bulk != cursor->bulk)) {
+   list_del_init(>bulk_link);
+   return;
+   }
+
+   pos = >pos[cursor->man->mem_type][cursor->priority];
+   if (pos)
+   list_move(>hitch.link, >last->lru.link);
+   ttm_resource_cursor_clear_bulk(cursor);
+}
+
+/* Move all cursors attached to a bulk move to its end */
+static void ttm_bulk_move_adjust_cursors(struct ttm_lru_bulk_move *bulk)
+{
+   struct ttm_resource_cursor *cursor, *next;
+
+   list_for_each_entry_safe(cursor, next, >cursor_list, bulk_link)
+   ttm_resource_cursor_move_bulk_tail(bulk, cursor);
+}
+
+/* Remove a cursor from an empty bulk move list */
+static void ttm_bulk_move_drop_cursors(struct ttm_lru_bulk_move *bulk)
+{
+   struct ttm_resource_cursor *cursor, *next;
+
+   list_for_each_entry_safe(cursor, next, >cursor_list, bulk_link)
+   ttm_resource_cursor_clear_bulk(cursor);
+}
+
 /**
  * ttm_resource_cursor_fini_locked() - Finalize the LRU list cursor usage
  * @cursor: The struct ttm_resource_cursor to finalize.
@@ -44,6 +87,7 @@ void ttm_resource_cursor_fini_locked(struct 
ttm_resource_cursor *cursor)
 {
lockdep_assert_held(>man->bdev->lru_lock);
list_del_init(>hitch.link);
+   ttm_resource_cursor_clear_bulk(cursor);
 }
 
 /**
@@ -72,9 +116,27 @@ void ttm_resource_cursor_fini(struct ttm_resource_cursor 
*cursor)
 void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk)
 {
memset(bulk, 0, sizeof(*bulk));
+   INIT_LIST_HEAD(>cursor_list);
 }
 EXPORT_SYMBOL(ttm_lru_bulk_move_init);
 
+/**
+ * ttm_lru_bulk_move_fini - finalize a bulk move structure
+ * @bdev: The struct ttm_device
+ * @bulk: the structure to finalize
+ *
+ * Sanity checks that bulk moves don't have any
+ * resources left and hence no cursors attached.
+ */
+void ttm_lru_bulk_move_fini(struct ttm_device *bdev,
+   struct ttm_lru_bulk_move *bulk)
+{
+   spin_lock(>lru_lock);
+   ttm_bulk_move_drop_cursors(bulk);
+   spin_unlock(>lru_lock);
+}
+EXPORT_SYMBOL(ttm_lru_bulk_move_fini);
+
 /**
  * ttm_lru_bulk_move_tail - bulk move range of resources to the LRU tail.