[PATCH 11/21] drm/amdkfd: allow split HQD on per-queue granularity v4

2017-03-06 Thread Andres Rodriguez
Update the KGD to KFD interface to allow sharing pipes with queue
granularity instead of pipe granularity.

This allows for more interesting pipe/queue splits.

v2: fix overflow check for res.queue_mask
v3: fix shift overflow when setting res.queue_mask
v4: fix comment in is_pipeline_enabled()

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  22 -
 drivers/gpu/drm/amd/amdkfd/kfd_device.c|   4 +
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 100 ++---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  10 +--
 .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c|   3 +-
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |   2 +-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h|  17 ++--
 drivers/gpu/drm/radeon/radeon_kfd.c|  21 -
 9 files changed, 126 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 3200ff9..8fc5aa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -88,28 +88,44 @@ void amdgpu_amdkfd_fini(void)
 
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 {
if (kgd2kfd)
adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
adev->pdev, kfd2kgd);
 }
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
+   int i;
+   int last_valid_bit;
if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
-
-   .first_compute_pipe = 1,
-   .compute_pipe_count = 4 - 1,
+   .num_mec = adev->gfx.mec.num_mec,
+   .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
+   .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
};
 
+   /* this is going to have a few of the MSBs set that we need to
+* clear */
+   bitmap_complement(gpu_resources.queue_bitmap,
+ adev->gfx.mec.queue_bitmap,
+ KGD_MAX_QUEUES);
+
+   /* According to linux/bitmap.h we shouldn't use bitmap_clear if
+* nbits is not compile time constant */
+   last_valid_bit = adev->gfx.mec.num_mec
+   * adev->gfx.mec.num_pipe_per_mec
+   * adev->gfx.mec.num_queue_per_pipe;
+   for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+   clear_bit(i, gpu_resources.queue_bitmap);
+
amdgpu_doorbell_get_kfd_info(adev,
_resources.doorbell_physical_address,
_resources.doorbell_aperture_size,
_resources.doorbell_start_offset);
 
kgd2kfd->device_init(adev->kfd, _resources);
}
 }
 
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3f95f7c..88187bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -219,20 +219,24 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int 
pasid,
return AMD_IOMMU_INV_PRI_RSP_INVALID;
 }
 
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
 const struct kgd2kfd_shared_resources *gpu_resources)
 {
unsigned int size;
 
kfd->shared_resources = *gpu_resources;
 
+   /* We only use the first MEC */
+   if (kfd->shared_resources.num_mec > 1)
+   kfd->shared_resources.num_mec = 1;
+
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
kfd->device_info->mqd_size_aligned;
 
/*
 * calculate max size of runlist packet.
 * There can be only 2 packets at once
 */
size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) +
max_num_of_queues_per_device *
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c064dea..5f28720 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -56,35 +56,58 @@ static void deallocate_sdma_queue(struct 
device_queue_manager *dqm,
unsigned int sdma_queue_id);
 
 static inline
 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
 {
if (type == KFD_QUEUE_TYPE_SDMA)

[PATCH 11/21] drm/amdkfd: allow split HQD on per-queue granularity v4

2017-03-02 Thread Andres Rodriguez
Update the KGD to KFD interface to allow sharing pipes with queue
granularity instead of pipe granularity.

This allows for more interesting pipe/queue splits.

v2: fix overflow check for res.queue_mask
v3: fix shift overflow when setting res.queue_mask
v4: fix comment in is_pipeline_enabled()

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  22 -
 drivers/gpu/drm/amd/amdkfd/kfd_device.c|   4 +
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 100 ++---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  10 +--
 .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c|   3 +-
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |   2 +-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h|  17 ++--
 drivers/gpu/drm/radeon/radeon_kfd.c|  21 -
 9 files changed, 126 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 3200ff9..8fc5aa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -88,28 +88,44 @@ void amdgpu_amdkfd_fini(void)
 
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 {
if (kgd2kfd)
adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
adev->pdev, kfd2kgd);
 }
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
+   int i;
+   int last_valid_bit;
if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
-
-   .first_compute_pipe = 1,
-   .compute_pipe_count = 4 - 1,
+   .num_mec = adev->gfx.mec.num_mec,
+   .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
+   .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
};
 
+   /* this is going to have a few of the MSBs set that we need to
+* clear */
+   bitmap_complement(gpu_resources.queue_bitmap,
+ adev->gfx.mec.queue_bitmap,
+ KGD_MAX_QUEUES);
+
+   /* According to linux/bitmap.h we shouldn't use bitmap_clear if
+* nbits is not compile time constant */
+   last_valid_bit = adev->gfx.mec.num_mec
+   * adev->gfx.mec.num_pipe_per_mec
+   * adev->gfx.mec.num_queue_per_pipe;
+   for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+   clear_bit(i, gpu_resources.queue_bitmap);
+
amdgpu_doorbell_get_kfd_info(adev,
_resources.doorbell_physical_address,
_resources.doorbell_aperture_size,
_resources.doorbell_start_offset);
 
kgd2kfd->device_init(adev->kfd, _resources);
}
 }
 
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3f95f7c..88187bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -219,20 +219,24 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int 
pasid,
return AMD_IOMMU_INV_PRI_RSP_INVALID;
 }
 
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
 const struct kgd2kfd_shared_resources *gpu_resources)
 {
unsigned int size;
 
kfd->shared_resources = *gpu_resources;
 
+   /* We only use the first MEC */
+   if (kfd->shared_resources.num_mec > 1)
+   kfd->shared_resources.num_mec = 1;
+
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
kfd->device_info->mqd_size_aligned;
 
/*
 * calculate max size of runlist packet.
 * There can be only 2 packets at once
 */
size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) +
max_num_of_queues_per_device *
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c064dea..5f28720 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -56,35 +56,58 @@ static void deallocate_sdma_queue(struct 
device_queue_manager *dqm,
unsigned int sdma_queue_id);
 
 static inline
 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
 {
if (type == KFD_QUEUE_TYPE_SDMA)
return KFD_MQD_TYPE_SDMA;
return KFD_MQD_TYPE_CP;
 }
 
-unsigned int get_first_pipe(struct