[PATCH 81/83] hsa/radeon: Eliminating all direct register accesses

2014-07-10 Thread Oded Gabbay
From: Ben Goz 

This patch eliminates all direct register accesses from KFD
and eliminate using of shared locks between KFD and radeon.

The single exception is the doorbells that are used in
both of the drivers. However, because they are located
in separate pci bar pages, the danger of sharing registers
between the drivers is minimal.

Having said that, we are planning to move the doorbells as well
to radeon.

Signed-off-by: Ben Goz 
Signed-off-by: Oded Gabbay 
---
 drivers/gpu/hsa/radeon/Makefile   |   2 +-
 drivers/gpu/hsa/radeon/kfd_device.c   |   2 -
 drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 113 +++---
 drivers/gpu/hsa/radeon/kfd_kernel_queue.c |  12 +-
 drivers/gpu/hsa/radeon/kfd_mqd_manager.c  | 175 +-
 drivers/gpu/hsa/radeon/kfd_mqd_manager.h  |  37 +++--
 drivers/gpu/hsa/radeon/kfd_priv.h |  18 ---
 drivers/gpu/hsa/radeon/kfd_registers.c|  50 ---
 8 files changed, 54 insertions(+), 355 deletions(-)
 delete mode 100644 drivers/gpu/hsa/radeon/kfd_registers.c

diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index b5f05b4..d838bce 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -4,7 +4,7 @@
 
 radeon_kfd-y   := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_pasid.o kfd_topology.o kfd_process.o \
-   kfd_doorbell.o kfd_registers.o kfd_vidmem.o \
+   kfd_doorbell.o kfd_vidmem.o \
kfd_interrupt.o kfd_aperture.o kfd_queue.o kfd_mqd_manager.o \
kfd_kernel_queue.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o
diff --git a/drivers/gpu/hsa/radeon/kfd_device.c 
b/drivers/gpu/hsa/radeon/kfd_device.c
index 30558c9..0ff2241 100644
--- a/drivers/gpu/hsa/radeon/kfd_device.c
+++ b/drivers/gpu/hsa/radeon/kfd_device.c
@@ -157,8 +157,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 {
kfd->shared_resources = *gpu_resources;
 
-   kfd->regs = gpu_resources->mmio_registers;
-
radeon_kfd_doorbell_init(kfd);
 
if (radeon_kfd_interrupt_init(kfd))
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c 
b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
index 12b8b33..3eb5db3 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
@@ -112,30 +112,15 @@ static void init_process_memory(struct 
device_queue_manager *dqm, struct qcm_pro
 
 static void program_sh_mem_settings(struct device_queue_manager *dqm, struct 
qcm_process_device *qpd)
 {
-   struct mqd_manager *mqd;
-
-   BUG_ON(qpd->vmid < KFD_VMID_START_OFFSET);
-
-   mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
-   if (mqd == NULL)
-   return;
-
-   mqd->acquire_hqd(mqd, 0, 0, qpd->vmid);
-
-   WRITE_REG(dqm->dev, SH_MEM_CONFIG, qpd->sh_mem_config);
-
-   WRITE_REG(dqm->dev, SH_MEM_APE1_BASE, qpd->sh_mem_ape1_base);
-   WRITE_REG(dqm->dev, SH_MEM_APE1_LIMIT, qpd->sh_mem_ape1_limit);
-   WRITE_REG(dqm->dev, SH_MEM_BASES, qpd->sh_mem_bases);
-
-   mqd->release_hqd(mqd);
+   return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid, 
qpd->sh_mem_config,
+   qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, 
qpd->sh_mem_bases);
 }
 
 static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue 
*q,
struct qcm_process_device *qpd, int *allocate_vmid)
 {
bool set, is_new_vmid;
-   int bit, retval, pipe;
+   int bit, retval, pipe, i;
struct mqd_manager *mqd;
 
BUG_ON(!dqm || !q || !qpd || !allocate_vmid);
@@ -171,8 +156,8 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm, struct queue *
q->properties.vmid = qpd->vmid;
 
set = false;
-   for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm);
-   pipe = (pipe + 1) % get_pipes_num(dqm)) {
+   for (i = 0, pipe = dqm->next_pipe_to_allocate; i < get_pipes_num(dqm);
+   pipe = (pipe + i++) % get_pipes_num(dqm)) {
if (dqm->allocated_queues[pipe] != 0) {
bit = find_first_bit((unsigned long 
*)>allocated_queues[pipe], QUEUES_PER_PIPE);
clear_bit(bit, (unsigned long 
*)>allocated_queues[pipe]);
@@ -238,9 +223,7 @@ static int destroy_queue_nocpsch(struct 
device_queue_manager *dqm, struct qcm_pr
retval = -ENOMEM;
goto out;
}
-   mqd->acquire_hqd(mqd, q->pipe, q->queue, 0);
-   retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT, 
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
-   mqd->release_hqd(mqd);
+   retval = mqd->destroy_mqd(mqd, false, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, 
q->pipe, q->queue);
if (retval != 0)
goto out;
 
@@ 

[PATCH 81/83] hsa/radeon: Eliminating all direct register accesses

2014-07-10 Thread Oded Gabbay
From: Ben Goz ben@amd.com

This patch eliminates all direct register accesses from KFD
and eliminate using of shared locks between KFD and radeon.

The single exception is the doorbells that are used in
both of the drivers. However, because they are located
in separate pci bar pages, the danger of sharing registers
between the drivers is minimal.

Having said that, we are planning to move the doorbells as well
to radeon.

Signed-off-by: Ben Goz ben@amd.com
Signed-off-by: Oded Gabbay oded.gab...@amd.com
---
 drivers/gpu/hsa/radeon/Makefile   |   2 +-
 drivers/gpu/hsa/radeon/kfd_device.c   |   2 -
 drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 113 +++---
 drivers/gpu/hsa/radeon/kfd_kernel_queue.c |  12 +-
 drivers/gpu/hsa/radeon/kfd_mqd_manager.c  | 175 +-
 drivers/gpu/hsa/radeon/kfd_mqd_manager.h  |  37 +++--
 drivers/gpu/hsa/radeon/kfd_priv.h |  18 ---
 drivers/gpu/hsa/radeon/kfd_registers.c|  50 ---
 8 files changed, 54 insertions(+), 355 deletions(-)
 delete mode 100644 drivers/gpu/hsa/radeon/kfd_registers.c

diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index b5f05b4..d838bce 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -4,7 +4,7 @@
 
 radeon_kfd-y   := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_pasid.o kfd_topology.o kfd_process.o \
-   kfd_doorbell.o kfd_registers.o kfd_vidmem.o \
+   kfd_doorbell.o kfd_vidmem.o \
kfd_interrupt.o kfd_aperture.o kfd_queue.o kfd_mqd_manager.o \
kfd_kernel_queue.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o
diff --git a/drivers/gpu/hsa/radeon/kfd_device.c 
b/drivers/gpu/hsa/radeon/kfd_device.c
index 30558c9..0ff2241 100644
--- a/drivers/gpu/hsa/radeon/kfd_device.c
+++ b/drivers/gpu/hsa/radeon/kfd_device.c
@@ -157,8 +157,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 {
kfd-shared_resources = *gpu_resources;
 
-   kfd-regs = gpu_resources-mmio_registers;
-
radeon_kfd_doorbell_init(kfd);
 
if (radeon_kfd_interrupt_init(kfd))
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c 
b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
index 12b8b33..3eb5db3 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
@@ -112,30 +112,15 @@ static void init_process_memory(struct 
device_queue_manager *dqm, struct qcm_pro
 
 static void program_sh_mem_settings(struct device_queue_manager *dqm, struct 
qcm_process_device *qpd)
 {
-   struct mqd_manager *mqd;
-
-   BUG_ON(qpd-vmid  KFD_VMID_START_OFFSET);
-
-   mqd = dqm-get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
-   if (mqd == NULL)
-   return;
-
-   mqd-acquire_hqd(mqd, 0, 0, qpd-vmid);
-
-   WRITE_REG(dqm-dev, SH_MEM_CONFIG, qpd-sh_mem_config);
-
-   WRITE_REG(dqm-dev, SH_MEM_APE1_BASE, qpd-sh_mem_ape1_base);
-   WRITE_REG(dqm-dev, SH_MEM_APE1_LIMIT, qpd-sh_mem_ape1_limit);
-   WRITE_REG(dqm-dev, SH_MEM_BASES, qpd-sh_mem_bases);
-
-   mqd-release_hqd(mqd);
+   return kfd2kgd-program_sh_mem_settings(dqm-dev-kgd, qpd-vmid, 
qpd-sh_mem_config,
+   qpd-sh_mem_ape1_base, qpd-sh_mem_ape1_limit, 
qpd-sh_mem_bases);
 }
 
 static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue 
*q,
struct qcm_process_device *qpd, int *allocate_vmid)
 {
bool set, is_new_vmid;
-   int bit, retval, pipe;
+   int bit, retval, pipe, i;
struct mqd_manager *mqd;
 
BUG_ON(!dqm || !q || !qpd || !allocate_vmid);
@@ -171,8 +156,8 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm, struct queue *
q-properties.vmid = qpd-vmid;
 
set = false;
-   for (pipe = dqm-next_pipe_to_allocate; pipe  get_pipes_num(dqm);
-   pipe = (pipe + 1) % get_pipes_num(dqm)) {
+   for (i = 0, pipe = dqm-next_pipe_to_allocate; i  get_pipes_num(dqm);
+   pipe = (pipe + i++) % get_pipes_num(dqm)) {
if (dqm-allocated_queues[pipe] != 0) {
bit = find_first_bit((unsigned long 
*)dqm-allocated_queues[pipe], QUEUES_PER_PIPE);
clear_bit(bit, (unsigned long 
*)dqm-allocated_queues[pipe]);
@@ -238,9 +223,7 @@ static int destroy_queue_nocpsch(struct 
device_queue_manager *dqm, struct qcm_pr
retval = -ENOMEM;
goto out;
}
-   mqd-acquire_hqd(mqd, q-pipe, q-queue, 0);
-   retval = mqd-destroy_mqd(mqd, q-mqd, KFD_PREEMPT_TYPE_WAVEFRONT, 
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
-   mqd-release_hqd(mqd);
+   retval = mqd-destroy_mqd(mqd, false, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, 
q-pipe, q-queue);
if (retval != 0)
goto