On 5/7/26 17:05, Alex Deucher wrote:
> On Thu, May 7, 2026 at 10:54 AM Christian König
> <[email protected]> wrote:
>>
>> On 5/7/26 16:09, Yifan Zhang wrote:
>>> During Mode 1 reset, the ASIC undergoes a reset cycle and becomes 
>>> temporarily
>>> inaccessible via PCIe. Any attempt to access framebuffer or MMIO registers 
>>> during
>>> this window can result in uncompleted PCIe transactions, leading to NMI 
>>> panics or
>>> system hangs.
>>>
>>> To prevent this, Unmap all of the applications mappings of the framebuffer
>>> and doorbell BARs before mode1 reset. Also prevent new mappings from coming 
>>> in
>>> during the reset process.
>>
>> I think putting the inode into the kfd_dev would break userspace, so that is 
>> a no-go and needs a different solution.
>>
>> But apart from that the patch looks like what I suggested before.
>>
>> Question is since when does that issue exists? Previously we didn't had to 
>> take care of that.
> 
> It's always existed, I guess generally there are not often CPU
> accesses to VRAM during a reset.  It depends on the platform and what
> level of PCIe error handling it enables.

Yeah I feared that. Yesterday I've gone through that step by step and came to 
the conclusion that our approach here won't work at all.

The problem is that vmf_insert_pfn_prot() can cycle back and wait for the GPU 
reset to complete.

So when we are now holding adev->reset_domain->sem to prevent new mapping while 
calling vmf_insert_pfn_prot() we can run into a deadlock.

I don't see how we can solve this except for switching TTMs fault handling over 
to apply_to_page_range() and then holding the lock while calling set_pte().

I tried to do that a while back because it would also improve TTMs mapping 
performance quite a bit, but that was rejected from upstream because of 
restrictions around apply_to_page_range().

Anyway this can only be done with a *huge* change to TTM and KFD. I fear we 
will need multiple month for that.

Regards,
Christian.

> 
> Alex
> 
>>
>> Regards,
>> Christian.
>>
>>>
>>> Signed-off-by: Yifan Zhang <[email protected]>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h      |  1 +
>>>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c    | 12 ++++++++++++
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c      |  7 ++++++-
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c         | 17 +++++++++++++++--
>>>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c        | 10 ++++++++++
>>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h           |  3 +++
>>>  6 files changed, 47 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> index 2bf6a31c194d..5333e052d56d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> @@ -360,6 +360,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct 
>>> amdgpu_device *adev,
>>>               uint64_t size, u32 alloc_flag, int8_t xcp_id);
>>>  void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
>>>               uint64_t size, u32 alloc_flag, int8_t xcp_id);
>>> +void amdgpu_amdkfd_clear_kfd_mapping(struct amdgpu_device *adev);
>>>
>>>  u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id);
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index 7c01492e69dd..3ac2bd86c08b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -139,6 +139,18 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
>>>       kfd_mem_limit.system_mem_used += size;
>>>  }
>>>
>>> +void amdgpu_amdkfd_clear_kfd_mapping(struct amdgpu_device *adev)
>>> +{
>>> +     if (adev->kfd.dev && adev->kfd.dev->inode &&
>>> +             adev->kfd.dev->inode->i_mapping) {
>>> +             unmap_mapping_range(adev->kfd.dev->inode->i_mapping,
>>> +                     KFD_MMAP_TYPE_DOORBELL, 
>>> kfd_doorbell_process_slice(adev->kfd.dev), 1);
>>> +             unmap_mapping_range(adev->kfd.dev->inode->i_mapping,
>>> +                     KFD_MMAP_TYPE_MMIO, PAGE_SIZE, 1);
>>> +     }
>>> +}
>>> +
>>> +
>>>  /* Estimate page table size needed to represent a given memory size
>>>   *
>>>   * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 073f632f295a..c67936d1fb0d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -5019,7 +5019,6 @@ int amdgpu_device_mode1_reset(struct amdgpu_device 
>>> *adev)
>>>
>>>       /* disable BM */
>>>       pci_clear_master(adev->pdev);
>>> -
>>>       if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
>>>               dev_info(adev->dev, "GPU smu mode1 reset\n");
>>>               ret = amdgpu_dpm_mode1_reset(adev);
>>> @@ -5840,6 +5839,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
>>> *adev,
>>>       /* We need to lock reset domain only once both for XGMI and single 
>>> device */
>>>       amdgpu_device_recovery_get_reset_lock(adev, &device_list);
>>>
>>> +     /* unmap all the mappings of doorbell and framebuffer to prevent user 
>>> space from
>>> +      * accessing them
>>> +      */
>>> +     unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
>>> +     amdgpu_amdkfd_clear_kfd_mapping(adev);
>>> +
>>>       amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
>>>                                     hive, need_emergency_restart);
>>>       if (need_emergency_restart)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index 0071d6957828..1dd343f0219f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -46,6 +46,7 @@
>>>  #include "amdgpu_hmm.h"
>>>  #include "amdgpu_xgmi.h"
>>>  #include "amdgpu_vm.h"
>>> +#include "amdgpu_reset.h"
>>>
>>>  static int
>>>  amdgpu_gem_add_input_fence(struct drm_file *filp,
>>> @@ -118,13 +119,21 @@ amdgpu_gem_update_timeline_node(struct drm_file *filp,
>>>  static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
>>>  {
>>>       struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
>>> +     struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
>>>       struct drm_device *ddev = bo->base.dev;
>>>       vm_fault_t ret;
>>>       int idx;
>>>
>>> +     /* Prevent new mappings from coming in during reset */
>>> +
>>> +     if (!down_read_trylock(&adev->reset_domain->sem))
>>> +             return VM_FAULT_SIGSEGV;
>>> +
>>>       ret = ttm_bo_vm_reserve(bo, vmf);
>>> -     if (ret)
>>> +     if (ret) {
>>> +             up_read(&adev->reset_domain->sem);
>>>               return ret;
>>> +     }
>>>
>>>       if (drm_dev_enter(ddev, &idx)) {
>>>               ret = amdgpu_bo_fault_reserve_notify(bo);
>>> @@ -140,11 +149,15 @@ static vm_fault_t amdgpu_gem_fault(struct vm_fault 
>>> *vmf)
>>>       } else {
>>>               ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
>>>       }
>>> -     if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
>>> +     if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) 
>>> {
>>> +             up_read(&adev->reset_domain->sem);
>>>               return ret;
>>> +     }
>>>
>>>  unlock:
>>>       dma_resv_unlock(bo->base.resv);
>>> +     up_read(&adev->reset_domain->sem);
>>> +
>>>       return ret;
>>>  }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> index 84b9bde7f371..d5bf5ba73ca0 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> @@ -130,11 +130,21 @@ void kfd_chardev_exit(void)
>>>  static int kfd_open(struct inode *inode, struct file *filep)
>>>  {
>>>       struct kfd_process *process;
>>> +     struct kfd_node *node;
>>>       bool is_32bit_user_mode;
>>> +     int i;
>>>
>>>       if (iminor(inode) != 0)
>>>               return -ENODEV;
>>>
>>> +     /* Save inode in kfd_dev for unmap_mapping_range */
>>> +     for (i = 0; kfd_topology_enum_kfd_devices(i, &node) == 0; i++) {
>>> +             if (node && node->kfd && !node->kfd->inode) {
>>> +                     node->kfd->inode = inode;
>>> +                     break;
>>> +             }
>>> +     }
>>> +
>>>       is_32bit_user_mode = in_compat_syscall();
>>>
>>>       if (is_32bit_user_mode) {
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> index a6ff1db477f9..bfdd7d3bc7cb 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> @@ -373,6 +373,9 @@ struct kfd_dev {
>>>
>>>       struct workqueue_struct *ih_wq;
>>>
>>> +     /* kfd inode */
>>> +     struct inode *inode;
>>> +
>>>       /* Kernel doorbells for KFD device */
>>>       struct amdgpu_bo *doorbells;
>>>
>>

Reply via email to