[PATCH 81/83] hsa/radeon: Eliminating all direct register accesses
From: Ben Goz This patch eliminates all direct register accesses from KFD and eliminate using of shared locks between KFD and radeon. The single exception is the doorbells that are used in both of the drivers. However, because they are located in separate pci bar pages, the danger of sharing registers between the drivers is minimal. Having said that, we are planning to move the doorbells as well to radeon. Signed-off-by: Ben Goz Signed-off-by: Oded Gabbay --- drivers/gpu/hsa/radeon/Makefile | 2 +- drivers/gpu/hsa/radeon/kfd_device.c | 2 - drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 113 +++--- drivers/gpu/hsa/radeon/kfd_kernel_queue.c | 12 +- drivers/gpu/hsa/radeon/kfd_mqd_manager.c | 175 +- drivers/gpu/hsa/radeon/kfd_mqd_manager.h | 37 +++-- drivers/gpu/hsa/radeon/kfd_priv.h | 18 --- drivers/gpu/hsa/radeon/kfd_registers.c| 50 --- 8 files changed, 54 insertions(+), 355 deletions(-) delete mode 100644 drivers/gpu/hsa/radeon/kfd_registers.c diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile index b5f05b4..d838bce 100644 --- a/drivers/gpu/hsa/radeon/Makefile +++ b/drivers/gpu/hsa/radeon/Makefile @@ -4,7 +4,7 @@ radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \ kfd_pasid.o kfd_topology.o kfd_process.o \ - kfd_doorbell.o kfd_registers.o kfd_vidmem.o \ + kfd_doorbell.o kfd_vidmem.o \ kfd_interrupt.o kfd_aperture.o kfd_queue.o kfd_mqd_manager.o \ kfd_kernel_queue.o kfd_packet_manager.o \ kfd_process_queue_manager.o kfd_device_queue_manager.o diff --git a/drivers/gpu/hsa/radeon/kfd_device.c b/drivers/gpu/hsa/radeon/kfd_device.c index 30558c9..0ff2241 100644 --- a/drivers/gpu/hsa/radeon/kfd_device.c +++ b/drivers/gpu/hsa/radeon/kfd_device.c @@ -157,8 +157,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, { kfd->shared_resources = *gpu_resources; - kfd->regs = gpu_resources->mmio_registers; - radeon_kfd_doorbell_init(kfd); if (radeon_kfd_interrupt_init(kfd)) diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c index 12b8b33..3eb5db3 100644 --- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c +++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c @@ -112,30 +112,15 @@ static void init_process_memory(struct device_queue_manager *dqm, struct qcm_pro static void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { - struct mqd_manager *mqd; - - BUG_ON(qpd->vmid < KFD_VMID_START_OFFSET); - - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); - if (mqd == NULL) - return; - - mqd->acquire_hqd(mqd, 0, 0, qpd->vmid); - - WRITE_REG(dqm->dev, SH_MEM_CONFIG, qpd->sh_mem_config); - - WRITE_REG(dqm->dev, SH_MEM_APE1_BASE, qpd->sh_mem_ape1_base); - WRITE_REG(dqm->dev, SH_MEM_APE1_LIMIT, qpd->sh_mem_ape1_limit); - WRITE_REG(dqm->dev, SH_MEM_BASES, qpd->sh_mem_bases); - - mqd->release_hqd(mqd); + return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid, qpd->sh_mem_config, + qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, qpd->sh_mem_bases); } static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd, int *allocate_vmid) { bool set, is_new_vmid; - int bit, retval, pipe; + int bit, retval, pipe, i; struct mqd_manager *mqd; BUG_ON(!dqm || !q || !qpd || !allocate_vmid); @@ -171,8 +156,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue * q->properties.vmid = qpd->vmid; set = false; - for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm); - pipe = (pipe + 1) % get_pipes_num(dqm)) { + for (i = 0, pipe = dqm->next_pipe_to_allocate; i < get_pipes_num(dqm); + pipe = (pipe + i++) % get_pipes_num(dqm)) { if (dqm->allocated_queues[pipe] != 0) { bit = find_first_bit((unsigned long *)>allocated_queues[pipe], QUEUES_PER_PIPE); clear_bit(bit, (unsigned long *)>allocated_queues[pipe]); @@ -238,9 +223,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, struct qcm_pr retval = -ENOMEM; goto out; } - mqd->acquire_hqd(mqd, q->pipe, q->queue, 0); - retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); - mqd->release_hqd(mqd); + retval = mqd->destroy_mqd(mqd, false, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, q->pipe, q->queue); if (retval != 0) goto out; @@
[PATCH 81/83] hsa/radeon: Eliminating all direct register accesses
From: Ben Goz ben@amd.com This patch eliminates all direct register accesses from KFD and eliminate using of shared locks between KFD and radeon. The single exception is the doorbells that are used in both of the drivers. However, because they are located in separate pci bar pages, the danger of sharing registers between the drivers is minimal. Having said that, we are planning to move the doorbells as well to radeon. Signed-off-by: Ben Goz ben@amd.com Signed-off-by: Oded Gabbay oded.gab...@amd.com --- drivers/gpu/hsa/radeon/Makefile | 2 +- drivers/gpu/hsa/radeon/kfd_device.c | 2 - drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 113 +++--- drivers/gpu/hsa/radeon/kfd_kernel_queue.c | 12 +- drivers/gpu/hsa/radeon/kfd_mqd_manager.c | 175 +- drivers/gpu/hsa/radeon/kfd_mqd_manager.h | 37 +++-- drivers/gpu/hsa/radeon/kfd_priv.h | 18 --- drivers/gpu/hsa/radeon/kfd_registers.c| 50 --- 8 files changed, 54 insertions(+), 355 deletions(-) delete mode 100644 drivers/gpu/hsa/radeon/kfd_registers.c diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile index b5f05b4..d838bce 100644 --- a/drivers/gpu/hsa/radeon/Makefile +++ b/drivers/gpu/hsa/radeon/Makefile @@ -4,7 +4,7 @@ radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \ kfd_pasid.o kfd_topology.o kfd_process.o \ - kfd_doorbell.o kfd_registers.o kfd_vidmem.o \ + kfd_doorbell.o kfd_vidmem.o \ kfd_interrupt.o kfd_aperture.o kfd_queue.o kfd_mqd_manager.o \ kfd_kernel_queue.o kfd_packet_manager.o \ kfd_process_queue_manager.o kfd_device_queue_manager.o diff --git a/drivers/gpu/hsa/radeon/kfd_device.c b/drivers/gpu/hsa/radeon/kfd_device.c index 30558c9..0ff2241 100644 --- a/drivers/gpu/hsa/radeon/kfd_device.c +++ b/drivers/gpu/hsa/radeon/kfd_device.c @@ -157,8 +157,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, { kfd-shared_resources = *gpu_resources; - kfd-regs = gpu_resources-mmio_registers; - radeon_kfd_doorbell_init(kfd); if (radeon_kfd_interrupt_init(kfd)) diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c index 12b8b33..3eb5db3 100644 --- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c +++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c @@ -112,30 +112,15 @@ static void init_process_memory(struct device_queue_manager *dqm, struct qcm_pro static void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { - struct mqd_manager *mqd; - - BUG_ON(qpd-vmid KFD_VMID_START_OFFSET); - - mqd = dqm-get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); - if (mqd == NULL) - return; - - mqd-acquire_hqd(mqd, 0, 0, qpd-vmid); - - WRITE_REG(dqm-dev, SH_MEM_CONFIG, qpd-sh_mem_config); - - WRITE_REG(dqm-dev, SH_MEM_APE1_BASE, qpd-sh_mem_ape1_base); - WRITE_REG(dqm-dev, SH_MEM_APE1_LIMIT, qpd-sh_mem_ape1_limit); - WRITE_REG(dqm-dev, SH_MEM_BASES, qpd-sh_mem_bases); - - mqd-release_hqd(mqd); + return kfd2kgd-program_sh_mem_settings(dqm-dev-kgd, qpd-vmid, qpd-sh_mem_config, + qpd-sh_mem_ape1_base, qpd-sh_mem_ape1_limit, qpd-sh_mem_bases); } static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd, int *allocate_vmid) { bool set, is_new_vmid; - int bit, retval, pipe; + int bit, retval, pipe, i; struct mqd_manager *mqd; BUG_ON(!dqm || !q || !qpd || !allocate_vmid); @@ -171,8 +156,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue * q-properties.vmid = qpd-vmid; set = false; - for (pipe = dqm-next_pipe_to_allocate; pipe get_pipes_num(dqm); - pipe = (pipe + 1) % get_pipes_num(dqm)) { + for (i = 0, pipe = dqm-next_pipe_to_allocate; i get_pipes_num(dqm); + pipe = (pipe + i++) % get_pipes_num(dqm)) { if (dqm-allocated_queues[pipe] != 0) { bit = find_first_bit((unsigned long *)dqm-allocated_queues[pipe], QUEUES_PER_PIPE); clear_bit(bit, (unsigned long *)dqm-allocated_queues[pipe]); @@ -238,9 +223,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, struct qcm_pr retval = -ENOMEM; goto out; } - mqd-acquire_hqd(mqd, q-pipe, q-queue, 0); - retval = mqd-destroy_mqd(mqd, q-mqd, KFD_PREEMPT_TYPE_WAVEFRONT, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); - mqd-release_hqd(mqd); + retval = mqd-destroy_mqd(mqd, false, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, q-pipe, q-queue); if (retval != 0) goto