[PATCH 11/21] drm/amdkfd: allow split HQD on per-queue granularity v4
Update the KGD to KFD interface to allow sharing pipes with queue granularity instead of pipe granularity. This allows for more interesting pipe/queue splits. v2: fix overflow check for res.queue_mask v3: fix shift overflow when setting res.queue_mask v4: fix comment in is_pipeline_enabled() Reviewed-by: Edward O'CallaghanAcked-by: Christian König Signed-off-by: Andres Rodriguez --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 22 - drivers/gpu/drm/amd/amdkfd/kfd_device.c| 4 + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 100 ++--- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 10 +-- .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c| 3 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +- drivers/gpu/drm/amd/include/kgd_kfd_interface.h| 17 ++-- drivers/gpu/drm/radeon/radeon_kfd.c| 21 - 9 files changed, 126 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 3200ff9..8fc5aa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -88,28 +88,44 @@ void amdgpu_amdkfd_fini(void) void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) { if (kgd2kfd) adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, adev->pdev, kfd2kgd); } void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { + int i; + int last_valid_bit; if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = 0xFF00, - - .first_compute_pipe = 1, - .compute_pipe_count = 4 - 1, + .num_mec = adev->gfx.mec.num_mec, + .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, + .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe }; + /* this is going to have a few of the MSBs set that we need to +* clear */ + bitmap_complement(gpu_resources.queue_bitmap, + adev->gfx.mec.queue_bitmap, + KGD_MAX_QUEUES); + + /* According to linux/bitmap.h we shouldn't use bitmap_clear if +* nbits is not compile time constant */ + last_valid_bit = adev->gfx.mec.num_mec + * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) + clear_bit(i, gpu_resources.queue_bitmap); + amdgpu_doorbell_get_kfd_info(adev, _resources.doorbell_physical_address, _resources.doorbell_aperture_size, _resources.doorbell_start_offset); kgd2kfd->device_init(adev->kfd, _resources); } } void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3f95f7c..88187bf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -219,20 +219,24 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, return AMD_IOMMU_INV_PRI_RSP_INVALID; } bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources) { unsigned int size; kfd->shared_resources = *gpu_resources; + /* We only use the first MEC */ + if (kfd->shared_resources.num_mec > 1) + kfd->shared_resources.num_mec = 1; + /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; /* * calculate max size of runlist packet. * There can be only 2 packets at once */ size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) + max_num_of_queues_per_device * diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c064dea..5f28720 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -56,35 +56,58 @@ static void deallocate_sdma_queue(struct device_queue_manager *dqm, unsigned int sdma_queue_id); static inline enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) { if (type == KFD_QUEUE_TYPE_SDMA)
[PATCH 11/21] drm/amdkfd: allow split HQD on per-queue granularity v4
Update the KGD to KFD interface to allow sharing pipes with queue granularity instead of pipe granularity. This allows for more interesting pipe/queue splits. v2: fix overflow check for res.queue_mask v3: fix shift overflow when setting res.queue_mask v4: fix comment in is_pipeline_enabled() Signed-off-by: Andres Rodriguez--- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 22 - drivers/gpu/drm/amd/amdkfd/kfd_device.c| 4 + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 100 ++--- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 10 +-- .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c| 3 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +- drivers/gpu/drm/amd/include/kgd_kfd_interface.h| 17 ++-- drivers/gpu/drm/radeon/radeon_kfd.c| 21 - 9 files changed, 126 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 3200ff9..8fc5aa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -88,28 +88,44 @@ void amdgpu_amdkfd_fini(void) void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) { if (kgd2kfd) adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, adev->pdev, kfd2kgd); } void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { + int i; + int last_valid_bit; if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = 0xFF00, - - .first_compute_pipe = 1, - .compute_pipe_count = 4 - 1, + .num_mec = adev->gfx.mec.num_mec, + .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, + .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe }; + /* this is going to have a few of the MSBs set that we need to +* clear */ + bitmap_complement(gpu_resources.queue_bitmap, + adev->gfx.mec.queue_bitmap, + KGD_MAX_QUEUES); + + /* According to linux/bitmap.h we shouldn't use bitmap_clear if +* nbits is not compile time constant */ + last_valid_bit = adev->gfx.mec.num_mec + * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) + clear_bit(i, gpu_resources.queue_bitmap); + amdgpu_doorbell_get_kfd_info(adev, _resources.doorbell_physical_address, _resources.doorbell_aperture_size, _resources.doorbell_start_offset); kgd2kfd->device_init(adev->kfd, _resources); } } void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3f95f7c..88187bf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -219,20 +219,24 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, return AMD_IOMMU_INV_PRI_RSP_INVALID; } bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources) { unsigned int size; kfd->shared_resources = *gpu_resources; + /* We only use the first MEC */ + if (kfd->shared_resources.num_mec > 1) + kfd->shared_resources.num_mec = 1; + /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; /* * calculate max size of runlist packet. * There can be only 2 packets at once */ size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) + max_num_of_queues_per_device * diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c064dea..5f28720 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -56,35 +56,58 @@ static void deallocate_sdma_queue(struct device_queue_manager *dqm, unsigned int sdma_queue_id); static inline enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) { if (type == KFD_QUEUE_TYPE_SDMA) return KFD_MQD_TYPE_SDMA; return KFD_MQD_TYPE_CP; } -unsigned int get_first_pipe(struct