Am 07.08.19 um 01:15 schrieb Jason Gunthorpe:
From: Jason Gunthorpe
radeon is using a device global hash table to track what mmu_notifiers
have been registered on struct mm. This is better served with the new
get/put scheme instead.
radeon has a bug where it was not blocking notifier release() until all
the BO's had been invalidated. This could result in a use after free of
pages the BOs. This is tied into a second bug where radeon left the
notifiers running endlessly even once the interval tree became
empty. This could result in a use after free with module unload.
Both are fixed by changing the lifetime model, the BOs exist in the
interval tree with their natural lifetimes independent of the mm_struct
lifetime using the get/put scheme. The release runs synchronously and just
does invalidate_start across the entire interval tree to create the
required DMA fence.
Additions to the interval tree after release are already impossible as
only current->mm is used during the add.
Signed-off-by: Jason Gunthorpe
Acked-by: Christian König
But I'm wondering if we shouldn't completely drop radeon userptr support.
It's just to buggy,
Christian.
---
drivers/gpu/drm/radeon/radeon.h| 3 -
drivers/gpu/drm/radeon/radeon_device.c | 2 -
drivers/gpu/drm/radeon/radeon_drv.c| 2 +
drivers/gpu/drm/radeon/radeon_mn.c | 157 ++---
4 files changed, 38 insertions(+), 126 deletions(-)
AMD team: I wonder if kfd has similar lifetime issues?
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 32808e50be12f8..918164f90b114a 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2451,9 +2451,6 @@ struct radeon_device {
/* tracking pinned memory */
u64 vram_pin_size;
u64 gart_pin_size;
-
- struct mutexmn_lock;
- DECLARE_HASHTABLE(mn_hash, 7);
};
bool radeon_is_px(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c
b/drivers/gpu/drm/radeon/radeon_device.c
index dceb554e567446..788b1d8a80e660 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1325,8 +1325,6 @@ int radeon_device_init(struct radeon_device *rdev,
init_rwsem(&rdev->pm.mclk_lock);
init_rwsem(&rdev->exclusive_lock);
init_waitqueue_head(&rdev->irq.vblank_queue);
- mutex_init(&rdev->mn_lock);
- hash_init(rdev->mn_hash);
r = radeon_gem_init(rdev);
if (r)
return r;
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c
b/drivers/gpu/drm/radeon/radeon_drv.c
index a6cbe11f79c611..b6535ac91fdb74 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -35,6 +35,7 @@
#include
#include
#include
+#include
#include
#include
@@ -624,6 +625,7 @@ static void __exit radeon_exit(void)
{
pci_unregister_driver(pdriver);
radeon_unregister_atpx_handler();
+ mmu_notifier_synchronize();
}
module_init(radeon_init);
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c
b/drivers/gpu/drm/radeon/radeon_mn.c
index 8c3871ed23a9f0..fc8254273a800b 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -37,17 +37,8 @@
#include "radeon.h"
struct radeon_mn {
- /* constant after initialisation */
- struct radeon_device*rdev;
- struct mm_struct*mm;
struct mmu_notifier mn;
- /* only used on destruction */
- struct work_struct work;
-
- /* protected by rdev->mn_lock */
- struct hlist_node node;
-
/* objects protected by lock */
struct mutexlock;
struct rb_root_cached objects;
@@ -58,55 +49,6 @@ struct radeon_mn_node {
struct list_headbos;
};
-/**
- * radeon_mn_destroy - destroy the rmn
- *
- * @work: previously sheduled work item
- *
- * Lazy destroys the notifier from a work item
- */
-static void radeon_mn_destroy(struct work_struct *work)
-{
- struct radeon_mn *rmn = container_of(work, struct radeon_mn, work);
- struct radeon_device *rdev = rmn->rdev;
- struct radeon_mn_node *node, *next_node;
- struct radeon_bo *bo, *next_bo;
-
- mutex_lock(&rdev->mn_lock);
- mutex_lock(&rmn->lock);
- hash_del(&rmn->node);
- rbtree_postorder_for_each_entry_safe(node, next_node,
-&rmn->objects.rb_root, it.rb) {
-
- interval_tree_remove(&node->it, &rmn->objects);
- list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
- bo->mn = NULL;
- list_del_init(&bo->mn_list);
- }
- kfree(node);
- }
- mutex_unlock(&rmn->lock);
- mutex_unlock(&rdev->mn_lock);
- mmu_notifier_unregister(&rmn->mn, rmn->mm);
- kfree(rmn);
-}
-
-/**
- * radeon_mn