Fix a sysfs duplication error when reinitializing the device: sysfs: cannot create duplicate filename '.../ip_discovery' kobject_add_internal failed for ip_discovery with -EEXIST ... Failed to create device file mem_info_preempt_used (-17)
The failure is caused by stale sysfs entries not being removed during device teardown, leading to -EEXIST when the driver is reprobed. In particular: - amdgpu_discovery sysfs kobjects were not fully torn down early enough, and ip_top remained non-NULL after cleanup - the preempt manager sysfs attribute was removed only conditionally and not during the common hw fini path Fix this by: - calling amdgpu_discovery_sysfs_fini() and amdgpu_preempt_mgr_sysfs_fini() from amdgpu_device_fini_hw() - making amdgpu_discovery_sysfs_fini() externally visible and clearing adev->discovery.ip_top to prevent reuse - centralizing preempt sysfs removal into a helper and reusing it from both fini paths This ensures sysfs state is fully cleaned up before reprobe and avoids duplicate kobject/file creation. Change-Id: Ib91bf9eac4a1901c05bdb17b20de3e4122323b34 Cc: Christian König <[email protected]> Cc: Alex Deucher <[email protected]> Signed-off-by: Geoffrey McRae <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 5 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c | 14 ++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + 5 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5ff224163bab..ef5cc4997656 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4200,6 +4200,10 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) if (adev->mman.initialized) drain_workqueue(adev->mman.bdev.wq); + + amdgpu_discovery_sysfs_fini(adev); + amdgpu_preempt_mgr_sysfs_fini(adev); + adev->shutdown = true; unregister_pm_notifier(&adev->pm_nb); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 7c2212985273..a2ae26bb11ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -704,8 +704,6 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) return r; } -static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev); - void amdgpu_discovery_fini(struct amdgpu_device *adev) { amdgpu_discovery_sysfs_fini(adev); @@ -1391,7 +1389,7 @@ static void amdgpu_discovery_sysfs_die_free(struct ip_die_entry *ip_die_entry) kobject_put(&ip_die_entry->ip_kset.kobj); } -static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev) +void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev) { struct ip_discovery_top *ip_top = adev->discovery.ip_top; struct list_head *el, *tmp; @@ -1400,6 +1398,7 @@ static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev) if (!ip_top) return; + adev->discovery.ip_top = NULL; die_kset = &ip_top->die_kset; spin_lock(&die_kset->list_lock); list_for_each_prev_safe(el, tmp, &die_kset->list) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index e0010f6a3eda..cff33ab2cb25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -41,6 +41,7 @@ struct amdgpu_discovery_info { bool reserve_tmr; }; +void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev); void amdgpu_discovery_fini(struct amdgpu_device *adev); int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c index 34b5e22b44e5..eab81206c050 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c @@ -46,6 +46,17 @@ static ssize_t mem_info_preempt_used_show(struct device *dev, static DEVICE_ATTR_RO(mem_info_preempt_used); +/** + * amdgpu_preempt_mgr_sysfs_fini - remove PREEMPT manager sysfs attributes + * + * @adev: amdgpu_device pointer + */ +void amdgpu_preempt_mgr_sysfs_fini(struct amdgpu_device *adev) +{ + if (adev->dev->kobj.sd) + device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used); +} + /** * amdgpu_preempt_mgr_new - allocate a new node * @@ -137,8 +148,7 @@ void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev) if (ret) return; - if (adev->dev->kobj.sd) - device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used); + amdgpu_preempt_mgr_sysfs_fini(adev); ttm_resource_manager_cleanup(man); ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 2d72fa217274..00acec7226f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -140,6 +140,7 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size); void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev); int amdgpu_preempt_mgr_init(struct amdgpu_device *adev); void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev); +void amdgpu_preempt_mgr_sysfs_fini(struct amdgpu_device *adev); int amdgpu_vram_mgr_init(struct amdgpu_device *adev); void amdgpu_vram_mgr_fini(struct amdgpu_device *adev); -- 2.43.0
