On 8/2/2025 12:10 AM, Felix Kuehling wrote: > On 2025-08-01 4:55, Zhu Lingshan wrote: >> This commit decommissions the function kfd_get_process() >> because it can not locate a specific kfd process among >> mulitple contexts. >> >> This commit refactors the relevant code path accordingly: >> - kmmap: retrieve the kfd_process from filep->private_data >> - kq_initialize: queue->process for HIQ should be set to NULL >> because it does not belong to any kfd_process. DIQ has been >> decommissioned in this commit because it has been marked as >> DEPRECATED since 2022 in commit 5bdd3eb2 >> >> This commit removes test_kq() function becuse it has been >> marked as unused since 2014 and no other functions calls it. > Please split this into 3 commits: > > 1. Change how kfd_mmap looks up the process > 2. Remove DIQ support > 3. Remove test_kq
Will do! > See one more comment inline. > > >> Signed-off-by: Zhu Lingshan <lingshan....@amd.com> >> --- >> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 11 ++-- >> .../drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +- >> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 60 ++----------------- >> .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 4 -- >> .../drm/amd/amdkfd/kfd_packet_manager_vi.c | 4 -- >> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 - >> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 18 ------ >> .../amd/amdkfd/kfd_process_queue_manager.c | 35 +---------- >> 8 files changed, 16 insertions(+), 123 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c >> index 5b22e1c47b2e..9e95acd23889 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c >> @@ -3408,16 +3408,19 @@ static int kfd_mmio_mmap(struct kfd_node *dev, >> struct kfd_process *process, >> } >> >> >> -static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) >> +static int kfd_mmap(struct file *filep, struct vm_area_struct *vma) >> { >> struct kfd_process *process; >> struct kfd_node *dev = NULL; >> unsigned long mmap_offset; >> unsigned int gpu_id; >> >> - process = kfd_get_process(current); >> - if (IS_ERR(process)) >> - return PTR_ERR(process); >> + process = filep->private_data; >> + if (!process) >> + return -ESRCH; >> + >> + if (process->lead_thread != current->group_leader) >> + return -EBADF; >> >> mmap_offset = vma->vm_pgoff << PAGE_SHIFT; >> gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset); >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c >> index 76359c6a3f3a..3f78e0bb2dae 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c >> @@ -399,8 +399,7 @@ static void increment_queue_count(struct >> device_queue_manager *dqm, >> struct queue *q) >> { >> dqm->active_queue_count++; >> - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || >> - q->properties.type == KFD_QUEUE_TYPE_DIQ) >> + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) >> dqm->active_cp_queue_count++; >> >> if (q->properties.is_gws) { >> @@ -414,8 +413,7 @@ static void decrement_queue_count(struct >> device_queue_manager *dqm, >> struct queue *q) >> { >> dqm->active_queue_count--; >> - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || >> - q->properties.type == KFD_QUEUE_TYPE_DIQ) >> + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) >> dqm->active_cp_queue_count--; >> >> if (q->properties.is_gws) { >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >> index 2b0a830f5b29..6aa8b0348bad 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >> @@ -46,7 +46,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct >> kfd_node *dev, >> int retval; >> union PM4_MES_TYPE_3_HEADER nop; >> >> - if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ)) >> + if (WARN_ON(type != KFD_QUEUE_TYPE_HIQ)) >> return false; >> >> pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ, >> @@ -61,14 +61,9 @@ static bool kq_initialize(struct kernel_queue *kq, struct >> kfd_node *dev, >> >> kq->dev = dev; >> kq->nop_packet = nop.u32all; >> - switch (type) { >> - case KFD_QUEUE_TYPE_DIQ: >> - kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ]; >> - break; >> - case KFD_QUEUE_TYPE_HIQ: >> + if (type == KFD_QUEUE_TYPE_HIQ) >> kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; >> - break; >> - default: >> + else { >> dev_err(dev->adev->dev, "Invalid queue type %d\n", type); >> return false; >> } >> @@ -144,7 +139,8 @@ static bool kq_initialize(struct kernel_queue *kq, >> struct kfd_node *dev, >> goto err_init_queue; >> >> kq->queue->device = dev; >> - kq->queue->process = kfd_get_process(current); >> + if (type == KFD_QUEUE_TYPE_HIQ) >> + kq->queue->process = NULL; >> >> kq->queue->mqd_mem_obj = kq->mqd_mgr->allocate_mqd(kq->mqd_mgr->dev, >> &kq->queue->properties); >> @@ -162,24 +158,11 @@ static bool kq_initialize(struct kernel_queue *kq, >> struct kfd_node *dev, >> kq->mqd_mgr->load_mqd(kq->mqd_mgr, kq->queue->mqd, >> kq->queue->pipe, kq->queue->queue, >> &kq->queue->properties, NULL); >> - } else { >> - /* allocate fence for DIQ */ >> - >> - retval = kfd_gtt_sa_allocate(dev, sizeof(uint32_t), >> - &kq->fence_mem_obj); >> - >> - if (retval != 0) >> - goto err_alloc_fence; >> - >> - kq->fence_kernel_address = kq->fence_mem_obj->cpu_ptr; >> - kq->fence_gpu_addr = kq->fence_mem_obj->gpu_addr; >> } >> >> print_queue(kq->queue); >> >> return true; >> -err_alloc_fence: >> - kq->mqd_mgr->free_mqd(kq->mqd_mgr, kq->queue->mqd, >> kq->queue->mqd_mem_obj); >> err_allocate_mqd: >> uninit_queue(kq->queue); >> err_init_queue: >> @@ -209,8 +192,6 @@ static void kq_uninitialize(struct kernel_queue *kq) >> kq->queue->queue); >> up_read(&kq->dev->adev->reset_domain->sem); >> } >> - else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ) >> - kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj); >> >> kq->mqd_mgr->free_mqd(kq->mqd_mgr, kq->queue->mqd, >> kq->queue->mqd_mem_obj); >> @@ -358,34 +339,3 @@ void kernel_queue_uninit(struct kernel_queue *kq) >> kq_uninitialize(kq); >> kfree(kq); >> } >> - >> -/* FIXME: Can this test be removed? */ >> -static __attribute__((unused)) void test_kq(struct kfd_node *dev) >> -{ >> - struct kernel_queue *kq; >> - uint32_t *buffer, i; >> - int retval; >> - >> - dev_err(dev->adev->dev, "Starting kernel queue test\n"); >> - >> - kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ); >> - if (unlikely(!kq)) { >> - dev_err(dev->adev->dev, " Failed to initialize HIQ\n"); >> - dev_err(dev->adev->dev, "Kernel queue test failed\n"); >> - return; >> - } >> - >> - retval = kq_acquire_packet_buffer(kq, 5, &buffer); >> - if (unlikely(retval != 0)) { >> - dev_err(dev->adev->dev, " Failed to acquire packet buffer\n"); >> - dev_err(dev->adev->dev, "Kernel queue test failed\n"); >> - return; >> - } >> - for (i = 0; i < 5; i++) >> - buffer[i] = kq->nop_packet; >> - kq_submit_packet(kq); >> - >> - dev_err(dev->adev->dev, "Ending kernel queue test\n"); >> -} >> - >> - >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c >> index 505036968a77..3d2375817c3e 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c >> @@ -252,10 +252,6 @@ static int pm_map_queues_v9(struct packet_manager *pm, >> uint32_t *buffer, >> packet->bitfields2.queue_type = >> queue_type__mes_map_queues__normal_latency_static_queue_vi; >> break; >> - case KFD_QUEUE_TYPE_DIQ: >> - packet->bitfields2.queue_type = >> - queue_type__mes_map_queues__debug_interface_queue_vi; >> - break; >> case KFD_QUEUE_TYPE_SDMA: >> case KFD_QUEUE_TYPE_SDMA_XGMI: >> if (q->properties.sdma_engine_id < 2 && >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c >> index a1de5d7e173a..60086e7cc258 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c >> @@ -166,10 +166,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, >> uint32_t *buffer, >> packet->bitfields2.queue_type = >> queue_type__mes_map_queues__normal_latency_static_queue_vi; >> break; >> - case KFD_QUEUE_TYPE_DIQ: >> - packet->bitfields2.queue_type = >> - queue_type__mes_map_queues__debug_interface_queue_vi; >> - break; >> case KFD_QUEUE_TYPE_SDMA: >> case KFD_QUEUE_TYPE_SDMA_XGMI: >> packet->bitfields2.engine_sel = q->properties.sdma_engine_id + >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> index a6e12c705734..67ebdaa9995f 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> @@ -1047,7 +1047,6 @@ void kfd_process_destroy_wq(void); >> void kfd_cleanup_processes(void); >> struct kfd_process *kfd_create_process(struct task_struct *thread); >> int kfd_create_process_sysfs(struct kfd_process *process); >> -struct kfd_process *kfd_get_process(const struct task_struct *task); >> struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid, >> struct kfd_process_device >> **pdd); >> struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c >> index 117e524f4fb3..2d01356627ef 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c >> @@ -989,24 +989,6 @@ struct kfd_process *kfd_create_process(struct >> task_struct *thread) >> return process; >> } >> >> -struct kfd_process *kfd_get_process(const struct task_struct *thread) >> -{ >> - struct kfd_process *process; >> - >> - if (!thread->mm) >> - return ERR_PTR(-EINVAL); >> - >> - /* Only the pthreads threading model is supported. */ >> - if (thread->group_leader->mm != thread->mm) >> - return ERR_PTR(-EINVAL); >> - >> - process = find_process(thread, false); >> - if (!process) >> - return ERR_PTR(-EINVAL); >> - >> - return process; >> -} >> - >> static struct kfd_process *find_process_by_mm(const struct mm_struct *mm) >> { >> struct kfd_process *process; >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c >> index c643e0ccec52..287ac5de838a 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c >> @@ -345,7 +345,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, >> * If we are just about to create DIQ, the is_debug flag is not set yet >> * Hence we also check the type as well >> */ >> - if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) >> + if ((pdd->qpd.is_debug)) >> max_queues = dev->kfd->device_info.max_no_of_hqd/2; >> >> if (pdd->qpd.queue_count >= max_queues) >> @@ -426,22 +426,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, >> restore_mqd, >> restore_ctl_stack); >> print_queue(q); >> break; >> - case KFD_QUEUE_TYPE_DIQ: >> - kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ); >> - if (!kq) { >> - retval = -ENOMEM; >> - goto err_create_queue; >> - } >> - kq->queue->properties.queue_id = *qid; >> - pqn->kq = kq; >> - pqn->q = NULL; >> - retval = kfd_process_drain_interrupts(pdd); >> - if (retval) >> - break; >> - >> - retval = dev->dqm->ops.create_kernel_queue(dev->dqm, >> - kq, &pdd->qpd); >> - break; >> default: >> WARN(1, "Invalid queue type %d", type); >> retval = -EINVAL; >> @@ -1128,24 +1112,9 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) >> mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type]; >> size = mqd_mgr->mqd_stride(mqd_mgr, >> &q->properties); >> - } else if (pqn->kq) { >> - q = pqn->kq->queue; >> - mqd_mgr = pqn->kq->mqd_mgr; >> - switch (q->properties.type) { >> - case KFD_QUEUE_TYPE_DIQ: >> - seq_printf(m, " DIQ on device %x\n", >> - pqn->kq->dev->id); >> - break; >> - default: >> - seq_printf(m, >> - " Bad kernel queue type %d on device %x\n", >> - q->properties.type, >> - pqn->kq->dev->id); >> - continue; >> - } >> } else { >> seq_printf(m, >> - " Weird: Queue node with neither kernel nor user queue\n"); >> + " Weird: Queue node with neither kernel nor user >> queue\n"); > This message is no longer accurate, since this function no longer handles > kernel queues at all. I will improve this message, drop kernel queue. Thanks Lingshan > > Regards, > Felix > > >> continue; >> } >>