[PATCH 4/4] drm/amdkfd: Optimize out sdma doorbell array in kgd2kfd_shared_resources

2019-02-15 Thread Zhao, Yong
We can directly calculate sdma doorbell indexes in the process doorbell
pages through the doorbell_index structure in amdgpu_device, so no need
to cache them in kgd2kfd_shared_resources any more. This alleviates the
adaptation needs when new SDMA configurations are introduced.

Change-Id: Ic657799856ed0256f36b01e502ef0cab263b1f49
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 41 +--
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 16 +---
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  4 +-
 3 files changed, 23 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 30e2b371578e..fe1d7368c1e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -131,7 +131,7 @@ static void amdgpu_doorbell_get_kfd_info(struct 
amdgpu_device *adev,
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
-   int i, n;
+   int i;
int last_valid_bit;
 
if (adev->kfd.dev) {
@@ -142,7 +142,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.gpuvm_size = min(adev->vm_manager.max_pfn
  << AMDGPU_GPU_PAGE_SHIFT,
  AMDGPU_GMC_HOLE_START),
-   .drm_render_minor = adev->ddev->render->index
+   .drm_render_minor = adev->ddev->render->index,
+   .sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
+
};
 
/* this is going to have a few of the MSBs set that we need to
@@ -172,31 +174,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
_resources.doorbell_aperture_size,
_resources.doorbell_start_offset);
 
-   if (adev->asic_type < CHIP_VEGA10) {
-   kgd2kfd_device_init(adev->kfd.dev, _resources);
-   return;
-   }
-
-   n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;
-
-   for (i = 0; i < n; i += 2) {
-   /* On SOC15 the BIF is involved in routing
-* doorbells using the low 12 bits of the
-* address. Communicate the assignments to
-* KFD. KFD uses two doorbell pages per
-* process in case of 64-bit doorbells so we
-* can use each doorbell assignment twice.
-*/
-   gpu_resources.sdma_doorbell[0][i] =
-   adev->doorbell_index.sdma_engine[0] + (i >> 1);
-   gpu_resources.sdma_doorbell[0][i+1] =
-   adev->doorbell_index.sdma_engine[0] + 0x200 + 
(i >> 1);
-   gpu_resources.sdma_doorbell[1][i] =
-   adev->doorbell_index.sdma_engine[1] + (i >> 1);
-   gpu_resources.sdma_doorbell[1][i+1] =
-   adev->doorbell_index.sdma_engine[1] + 0x200 + 
(i >> 1);
-   }
-
/* Since SOC15, BIF starts to statically use the
 * lower 12 bits of doorbell addresses for routing
 * based on settings in registers like
@@ -205,10 +182,12 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 * 12 bits of its address has to be outside the range
 * set for SDMA, VCN, and IH blocks.
 */
-   gpu_resources.non_cp_doorbells_start =
-   adev->doorbell_index.first_non_cp;
-   gpu_resources.non_cp_doorbells_end =
-   adev->doorbell_index.last_non_cp;
+   if (adev->asic_type >= CHIP_VEGA10) {
+   gpu_resources.non_cp_doorbells_start =
+   adev->doorbell_index.first_non_cp;
+   gpu_resources.non_cp_doorbells_end =
+   adev->doorbell_index.last_non_cp;
+   }
 
kgd2kfd_device_init(adev->kfd.dev, _resources);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 8372556b52eb..c6c9530e704e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -134,12 +134,18 @@ static int allocate_doorbell(struct qcm_process_device 
*qpd, struct queue *q)
 */
q->doorbell_id = q->properties.queue_id;
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-   /* For SDMA queues on SOC15, use static doorbell
-* assignments based on the engine and queue.
+   /* For SDMA queues on SOC15 with 

[PATCH 4/4] drm/amdkfd: Optimize out sdma doorbell array in kgd2kfd_shared_resources

2019-02-13 Thread Zhao, Yong
We can directly calculate sdma doorbell indexes in the process doorbell
pages through the doorbell_index structure in amdgpu_device, so no need
to cache them in kgd2kfd_shared_resources any more. This alleviates the
adaptation needs when new SDMA configurations are introduced.

Change-Id: Ic657799856ed0256f36b01e502ef0cab263b1f49
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 41 +--
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 16 +---
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  4 +-
 3 files changed, 23 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index a8a166fff1e3..88f6f0ae38a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -131,7 +131,7 @@ static void amdgpu_doorbell_get_kfd_info(struct 
amdgpu_device *adev,
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
-   int i, n;
+   int i;
int last_valid_bit;
 
if (adev->kfd.dev) {
@@ -142,7 +142,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.gpuvm_size = min(adev->vm_manager.max_pfn
  << AMDGPU_GPU_PAGE_SHIFT,
  AMDGPU_GMC_HOLE_START),
-   .drm_render_minor = adev->ddev->render->index
+   .drm_render_minor = adev->ddev->render->index,
+   .sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
+
};
 
/* this is going to have a few of the MSBs set that we need to
@@ -172,31 +174,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
_resources.doorbell_aperture_size,
_resources.doorbell_start_offset);
 
-   if (adev->asic_type < CHIP_VEGA10) {
-   kgd2kfd_device_init(adev->kfd.dev, _resources);
-   return;
-   }
-
-   n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;
-
-   for (i = 0; i < n; i += 2) {
-   /* On SOC15 the BIF is involved in routing
-* doorbells using the low 12 bits of the
-* address. Communicate the assignments to
-* KFD. KFD uses two doorbell pages per
-* process in case of 64-bit doorbells so we
-* can use each doorbell assignment twice.
-*/
-   gpu_resources.sdma_doorbell[0][i] =
-   adev->doorbell_index.sdma_engine[0] + (i >> 1);
-   gpu_resources.sdma_doorbell[0][i+1] =
-   adev->doorbell_index.sdma_engine[0] + 0x200 + 
(i >> 1);
-   gpu_resources.sdma_doorbell[1][i] =
-   adev->doorbell_index.sdma_engine[1] + (i >> 1);
-   gpu_resources.sdma_doorbell[1][i+1] =
-   adev->doorbell_index.sdma_engine[1] + 0x200 + 
(i >> 1);
-   }
-
/* Since SOC15, BIF starts to statically use the
 * lower 12 bits of doorbell addresses for routing
 * based on settings in registers like
@@ -205,10 +182,12 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 * 12 bits of its address has to be outside the range
 * set for SDMA, VCN, and IH blocks.
 */
-   gpu_resources.non_cp_doorbells_start =
-   adev->doorbell_index.sdma_engine[0];
-   gpu_resources.non_cp_doorbells_end =
-   adev->doorbell_index.last_non_cp;
+   if (adev->asic_type >= CHIP_VEGA10) {
+   gpu_resources.non_cp_doorbells_start =
+   adev->doorbell_index.sdma_engine[0];
+   gpu_resources.non_cp_doorbells_end =
+   adev->doorbell_index.last_non_cp;
+   }
 
kgd2kfd_device_init(adev->kfd.dev, _resources);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 8372556b52eb..c6c9530e704e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -134,12 +134,18 @@ static int allocate_doorbell(struct qcm_process_device 
*qpd, struct queue *q)
 */
q->doorbell_id = q->properties.queue_id;
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-   /* For SDMA queues on SOC15, use static doorbell
-* assignments based on the engine and queue.
+   /* For SDMA queues on SOC15