Fix a sysfs duplication error when reinitializing the device:

  sysfs: cannot create duplicate filename '.../ip_discovery'
  kobject_add_internal failed for ip_discovery with -EEXIST
  ...
  Failed to create device file mem_info_preempt_used (-17)

The failure is caused by stale sysfs entries not being removed during
device teardown, leading to -EEXIST when the driver is reprobed. In
particular:

- amdgpu_discovery sysfs kobjects were not fully torn down early enough,
  and ip_top remained non-NULL after cleanup
- the preempt manager sysfs attribute was removed only conditionally
  and not during the common hw fini path

Fix this by:
- calling amdgpu_discovery_sysfs_fini() and
  amdgpu_preempt_mgr_sysfs_fini() from amdgpu_device_fini_hw()
- making amdgpu_discovery_sysfs_fini() externally visible and clearing
  adev->discovery.ip_top to prevent reuse
- centralizing preempt sysfs removal into a helper and reusing it from
  both fini paths

This ensures sysfs state is fully cleaned up before reprobe and avoids
duplicate kobject/file creation.

Change-Id: Ib91bf9eac4a1901c05bdb17b20de3e4122323b34
Cc: Christian König <[email protected]>
Cc: Alex Deucher <[email protected]>
Signed-off-by: Geoffrey McRae <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c      |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c   |  5 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h   |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c | 14 ++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h         |  1 +
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5ff224163bab..ef5cc4997656 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4200,6 +4200,10 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 
        if (adev->mman.initialized)
                drain_workqueue(adev->mman.bdev.wq);
+
+       amdgpu_discovery_sysfs_fini(adev);
+       amdgpu_preempt_mgr_sysfs_fini(adev);
+
        adev->shutdown = true;
 
        unregister_pm_notifier(&adev->pm_nb);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 7c2212985273..a2ae26bb11ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -704,8 +704,6 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
        return r;
 }
 
-static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev);
-
 void amdgpu_discovery_fini(struct amdgpu_device *adev)
 {
        amdgpu_discovery_sysfs_fini(adev);
@@ -1391,7 +1389,7 @@ static void amdgpu_discovery_sysfs_die_free(struct 
ip_die_entry *ip_die_entry)
        kobject_put(&ip_die_entry->ip_kset.kobj);
 }
 
-static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
+void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
 {
        struct ip_discovery_top *ip_top = adev->discovery.ip_top;
        struct list_head *el, *tmp;
@@ -1400,6 +1398,7 @@ static void amdgpu_discovery_sysfs_fini(struct 
amdgpu_device *adev)
        if (!ip_top)
                return;
 
+       adev->discovery.ip_top = NULL;
        die_kset = &ip_top->die_kset;
        spin_lock(&die_kset->list_lock);
        list_for_each_prev_safe(el, tmp, &die_kset->list) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index e0010f6a3eda..cff33ab2cb25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -41,6 +41,7 @@ struct amdgpu_discovery_info {
        bool reserve_tmr;
 };
 
+void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev);
 void amdgpu_discovery_fini(struct amdgpu_device *adev);
 int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
index 34b5e22b44e5..eab81206c050 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
@@ -46,6 +46,17 @@ static ssize_t mem_info_preempt_used_show(struct device *dev,
 
 static DEVICE_ATTR_RO(mem_info_preempt_used);
 
+/**
+ * amdgpu_preempt_mgr_sysfs_fini - remove PREEMPT manager sysfs attributes
+ *
+ * @adev: amdgpu_device pointer
+ */
+void amdgpu_preempt_mgr_sysfs_fini(struct amdgpu_device *adev)
+{
+       if (adev->dev->kobj.sd)
+               device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
+}
+
 /**
  * amdgpu_preempt_mgr_new - allocate a new node
  *
@@ -137,8 +148,7 @@ void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
        if (ret)
                return;
 
-       if (adev->dev->kobj.sd)
-               device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
+       amdgpu_preempt_mgr_sysfs_fini(adev);
 
        ttm_resource_manager_cleanup(man);
        ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 2d72fa217274..00acec7226f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -140,6 +140,7 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, 
uint64_t gtt_size);
 void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
 int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
 void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev);
+void amdgpu_preempt_mgr_sysfs_fini(struct amdgpu_device *adev);
 int amdgpu_vram_mgr_init(struct amdgpu_device *adev);
 void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
 
-- 
2.43.0

Reply via email to