From: Oak Zeng <oak.z...@amd.com>

Existing QUEUE_TYPE_SDMA means PCIe optimized SDMA queues.
Introduce a new QUEUE_TYPE_SDMA_XGMI, which is optimized
for non-PCIe transfer such as XGMI.

Signed-off-by: Oak Zeng <oak.z...@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>
Signed-off-by: Felix Kuehling <felix.kuehl...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_device.c       |  15 +++
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 123 +++++++++++++-----
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |   3 +
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |   2 +
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c  |   2 +
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   4 +-
 .../amd/amdkfd/kfd_process_queue_manager.c    |  10 +-
 include/uapi/linux/kfd_ioctl.h                |   7 +-
 10 files changed, 132 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index d795e5018270..c731126ada22 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct 
queue_properties *q_properties,
                q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
        else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
                q_properties->type = KFD_QUEUE_TYPE_SDMA;
+       else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
+               q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
        else
                return -ENOTSUPP;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 8202a5db3a35..1368b41cb92b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = {
        .needs_iommu_device = true,
        .needs_pci_atomics = false,
        .num_sdma_engines = 2,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
 
@@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = {
        .needs_iommu_device = true,
        .needs_pci_atomics = false,
        .num_sdma_engines = 2,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
 
@@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = {
        .needs_iommu_device = true,
        .needs_pci_atomics = true,
        .num_sdma_engines = 1,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
 #endif
@@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = {
        .needs_iommu_device = false,
        .needs_pci_atomics = false,
        .num_sdma_engines = 2,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
 
@@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = {
        .needs_iommu_device = false,
        .needs_pci_atomics = true,
        .num_sdma_engines = 2,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
 
@@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = {
        .needs_iommu_device = false,
        .needs_pci_atomics = true,
        .num_sdma_engines = 2,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
 
@@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
        .needs_iommu_device = false,
        .needs_pci_atomics = false,
        .num_sdma_engines = 2,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
 
@@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = 
{
        .needs_iommu_device = false,
        .needs_pci_atomics = true,
        .num_sdma_engines = 2,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
 
@@ -186,6 +194,7 @@ static const struct kfd_device_info 
polaris10_vf_device_info = {
        .needs_iommu_device = false,
        .needs_pci_atomics = false,
        .num_sdma_engines = 2,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
 
@@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = 
{
        .needs_iommu_device = false,
        .needs_pci_atomics = true,
        .num_sdma_engines = 2,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
 
@@ -218,6 +228,7 @@ static const struct kfd_device_info polaris12_device_info = 
{
        .needs_iommu_device = false,
        .needs_pci_atomics = true,
        .num_sdma_engines = 2,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
 
@@ -234,6 +245,7 @@ static const struct kfd_device_info vega10_device_info = {
        .needs_iommu_device = false,
        .needs_pci_atomics = false,
        .num_sdma_engines = 2,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
 
@@ -250,6 +262,7 @@ static const struct kfd_device_info vega10_vf_device_info = 
{
        .needs_iommu_device = false,
        .needs_pci_atomics = false,
        .num_sdma_engines = 2,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
 
@@ -266,6 +279,7 @@ static const struct kfd_device_info vega12_device_info = {
        .needs_iommu_device = false,
        .needs_pci_atomics = false,
        .num_sdma_engines = 2,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
 
@@ -282,6 +296,7 @@ static const struct kfd_device_info vega20_device_info = {
        .needs_iommu_device = false,
        .needs_pci_atomics = false,
        .num_sdma_engines = 2,
+       .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 8,
 };
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d41045d3fc3a..1562590d837e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct 
device_queue_manager *dqm,
                                        struct qcm_process_device *qpd);
 
 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
-                               unsigned int sdma_queue_id);
+                               struct queue *q);
 
 static void kfd_process_hw_exception(struct work_struct *work);
 
 static inline
 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
 {
-       if (type == KFD_QUEUE_TYPE_SDMA)
+       if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
                return KFD_MQD_TYPE_SDMA;
        return KFD_MQD_TYPE_CP;
 }
@@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct 
device_queue_manager *dqm)
        return dqm->dev->device_info->num_sdma_engines;
 }
 
+static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
+{
+       return dqm->dev->device_info->num_xgmi_sdma_engines;
+}
+
 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
 {
        return dqm->dev->device_info->num_sdma_engines
                        * dqm->dev->device_info->num_sdma_queues_per_engine;
 }
 
+unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
+{
+       return dqm->dev->device_info->num_xgmi_sdma_engines
+                       * dqm->dev->device_info->num_sdma_queues_per_engine;
+}
+
 void program_sh_mem_settings(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd)
 {
@@ -133,7 +144,8 @@ static int allocate_doorbell(struct qcm_process_device 
*qpd, struct queue *q)
                 * preserve the user mode ABI.
                 */
                q->doorbell_id = q->properties.queue_id;
-       } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+       } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+                       q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
                /* For SDMA queues on SOC15 with 8-byte doorbell, use static
                 * doorbell assignments based on the engine and queue id.
                 * The doobell index distance between RLC (2*i) and (2*i+1)
@@ -174,7 +186,8 @@ static void deallocate_doorbell(struct qcm_process_device 
*qpd,
        struct kfd_dev *dev = qpd->dqm->dev;
 
        if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
-           q->properties.type == KFD_QUEUE_TYPE_SDMA)
+           q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+           q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
                return;
 
        old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
@@ -289,7 +302,8 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
 
        if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
                retval = create_compute_queue_nocpsch(dqm, q, qpd);
-       else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+       else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+                       q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
                retval = create_sdma_queue_nocpsch(dqm, q, qpd);
        else
                retval = -EINVAL;
@@ -307,6 +321,8 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
 
        if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
                dqm->sdma_queue_count++;
+       else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+               dqm->xgmi_sdma_queue_count++;
 
        /*
         * Unconditionally increment this counter, regardless of the queue's
@@ -430,7 +446,10 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
                deallocate_hqd(dqm, q);
        } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
                dqm->sdma_queue_count--;
-               deallocate_sdma_queue(dqm, q->sdma_id);
+               deallocate_sdma_queue(dqm, q);
+       } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+               dqm->xgmi_sdma_queue_count--;
+               deallocate_sdma_queue(dqm, q);
        } else {
                pr_debug("q->properties.type %d is invalid\n",
                                q->properties.type);
@@ -521,7 +540,8 @@ static int update_queue(struct device_queue_manager *dqm, 
struct queue *q)
                }
        } else if (prev_active &&
                   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
-                   q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
+                   q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+                   q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
                retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
                                KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
                                KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
@@ -548,7 +568,8 @@ static int update_queue(struct device_queue_manager *dqm, 
struct queue *q)
                retval = map_queues_cpsch(dqm);
        else if (q->properties.is_active &&
                 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
-                 q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
+                 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
                if (WARN(q->process->mm != current->mm,
                         "should only run in user thread"))
                        retval = -EFAULT;
@@ -840,6 +861,7 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
        INIT_LIST_HEAD(&dqm->queues);
        dqm->queue_count = dqm->next_pipe_to_allocate = 0;
        dqm->sdma_queue_count = 0;
+       dqm->xgmi_sdma_queue_count = 0;
 
        for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
                int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -852,6 +874,7 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
 
        dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
        dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
+       dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
 
        return 0;
 }
@@ -886,17 +909,34 @@ static int allocate_sdma_queue(struct 
device_queue_manager *dqm,
 {
        int bit;
 
-       if (dqm->sdma_bitmap == 0)
-               return -ENOMEM;
-
-       bit = __ffs64(dqm->sdma_bitmap);
-       dqm->sdma_bitmap &= ~(1ULL << bit);
-       q->sdma_id = bit;
-
-       q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
-       q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
+       if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+               if (dqm->sdma_bitmap == 0)
+                       return -ENOMEM;
+               bit = __ffs64(dqm->sdma_bitmap);
+               dqm->sdma_bitmap &= ~(1ULL << bit);
+               q->sdma_id = bit;
+               q->properties.sdma_engine_id = q->sdma_id %
+                               get_num_sdma_engines(dqm);
+               q->properties.sdma_queue_id = q->sdma_id /
+                               get_num_sdma_engines(dqm);
+       } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+               if (dqm->xgmi_sdma_bitmap == 0)
+                       return -ENOMEM;
+               bit = __ffs64(dqm->xgmi_sdma_bitmap);
+               dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
+               q->sdma_id = bit;
+               /* sdma_engine_id is sdma id including
+                * both PCIe-optimized SDMAs and XGMI-
+                * optimized SDMAs. The calculation below
+                * assumes the first N engines are always
+                * PCIe-optimized ones
+                */
+               q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
+                               q->sdma_id % get_num_xgmi_sdma_engines(dqm);
+               q->properties.sdma_queue_id = q->sdma_id /
+                               get_num_xgmi_sdma_engines(dqm);
+       }
 
-       pr_debug("SDMA id is:    %d\n", q->sdma_id);
        pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
        pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
 
@@ -904,11 +944,17 @@ static int allocate_sdma_queue(struct 
device_queue_manager *dqm,
 }
 
 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
-                               unsigned int sdma_id)
+                               struct queue *q)
 {
-       if (sdma_id >= get_num_sdma_queues(dqm))
-               return;
-       dqm->sdma_bitmap |= (1ULL << sdma_id);
+       if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+               if (q->sdma_id >= get_num_sdma_queues(dqm))
+                       return;
+               dqm->sdma_bitmap |= (1ULL << q->sdma_id);
+       } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+               if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
+                       return;
+               dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
+       }
 }
 
 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
@@ -946,7 +992,7 @@ static int create_sdma_queue_nocpsch(struct 
device_queue_manager *dqm,
 out_deallocate_doorbell:
        deallocate_doorbell(qpd, q);
 out_deallocate_sdma_queue:
-       deallocate_sdma_queue(dqm, q->sdma_id);
+       deallocate_sdma_queue(dqm, q);
 
        return retval;
 }
@@ -1004,8 +1050,10 @@ static int initialize_cpsch(struct device_queue_manager 
*dqm)
        INIT_LIST_HEAD(&dqm->queues);
        dqm->queue_count = dqm->processes_count = 0;
        dqm->sdma_queue_count = 0;
+       dqm->xgmi_sdma_queue_count = 0;
        dqm->active_runlist = false;
        dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
+       dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
 
        INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
 
@@ -1127,7 +1175,8 @@ static int create_queue_cpsch(struct device_queue_manager 
*dqm, struct queue *q,
                goto out;
        }
 
-       if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+       if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+               q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
                retval = allocate_sdma_queue(dqm, q);
                if (retval)
                        goto out;
@@ -1167,6 +1216,8 @@ static int create_queue_cpsch(struct device_queue_manager 
*dqm, struct queue *q,
 
        if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
                dqm->sdma_queue_count++;
+       else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+               dqm->xgmi_sdma_queue_count++;
        /*
         * Unconditionally increment this counter, regardless of the queue's
         * type or whether the queue is active.
@@ -1182,8 +1233,9 @@ static int create_queue_cpsch(struct device_queue_manager 
*dqm, struct queue *q,
 out_deallocate_doorbell:
        deallocate_doorbell(qpd, q);
 out_deallocate_sdma_queue:
-       if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
-               deallocate_sdma_queue(dqm, q->sdma_id);
+       if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+               q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+               deallocate_sdma_queue(dqm, q);
 out:
        return retval;
 }
@@ -1216,7 +1268,8 @@ static int unmap_sdma_queues(struct device_queue_manager 
*dqm)
 {
        int i, retval = 0;
 
-       for (i = 0; i < dqm->dev->device_info->num_sdma_engines; i++) {
+       for (i = 0; i < dqm->dev->device_info->num_sdma_engines +
+               dqm->dev->device_info->num_xgmi_sdma_engines; i++) {
                retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
                        KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);
                if (retval)
@@ -1258,10 +1311,10 @@ static int unmap_queues_cpsch(struct 
device_queue_manager *dqm,
        if (!dqm->active_runlist)
                return retval;
 
-       pr_debug("Before destroying queues, sdma queue count is : %u\n",
-               dqm->sdma_queue_count);
+       pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma 
queue count is : %u\n",
+               dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count);
 
-       if (dqm->sdma_queue_count > 0)
+       if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count)
                unmap_sdma_queues(dqm);
 
        retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
@@ -1333,7 +1386,10 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
 
        if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
                dqm->sdma_queue_count--;
-               deallocate_sdma_queue(dqm, q->sdma_id);
+               deallocate_sdma_queue(dqm, q);
+       } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+               dqm->xgmi_sdma_queue_count--;
+               deallocate_sdma_queue(dqm, q);
        }
 
        list_del(&q->list);
@@ -1550,7 +1606,10 @@ static int process_termination_cpsch(struct 
device_queue_manager *dqm,
        list_for_each_entry(q, &qpd->queues_list, list) {
                if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
                        dqm->sdma_queue_count--;
-                       deallocate_sdma_queue(dqm, q->sdma_id);
+                       deallocate_sdma_queue(dqm, q);
+               } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+                       dqm->xgmi_sdma_queue_count--;
+                       deallocate_sdma_queue(dqm, q);
                }
 
                if (q->properties.is_active)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 3742fd340ec3..88b4c007696e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -181,10 +181,12 @@ struct device_queue_manager {
        unsigned int            processes_count;
        unsigned int            queue_count;
        unsigned int            sdma_queue_count;
+       unsigned int            xgmi_sdma_queue_count;
        unsigned int            total_queue_count;
        unsigned int            next_pipe_to_allocate;
        unsigned int            *allocated_queues;
        uint64_t                sdma_bitmap;
+       uint64_t                xgmi_sdma_bitmap;
        unsigned int            vmid_bitmap;
        uint64_t                pipelines_addr;
        struct kfd_mem_obj      *pipeline_mem;
@@ -216,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager 
*dqm);
 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
+unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
 
 static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
index 33830b1a5a54..604570bea6bd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -175,6 +175,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, 
uint32_t *buffer,
                        queue_type__mes_map_queues__debug_interface_queue_vi;
                break;
        case KFD_QUEUE_TYPE_SDMA:
+       case KFD_QUEUE_TYPE_SDMA_XGMI:
                packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
                                engine_sel__mes_map_queues__sdma0_vi;
                use_static = false; /* no static queues under SDMA */
@@ -221,6 +222,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, 
uint32_t *buffer,
                        engine_sel__mes_unmap_queues__compute;
                break;
        case KFD_QUEUE_TYPE_SDMA:
+       case KFD_QUEUE_TYPE_SDMA_XGMI:
                packet->bitfields2.engine_sel =
                        engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
                break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
index bf20c6d32ef3..3cdb19826927 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
@@ -212,6 +212,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, 
uint32_t *buffer,
                        queue_type__mes_map_queues__debug_interface_queue_vi;
                break;
        case KFD_QUEUE_TYPE_SDMA:
+       case KFD_QUEUE_TYPE_SDMA_XGMI:
                packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
                                engine_sel__mes_map_queues__sdma0_vi;
                use_static = false; /* no static queues under SDMA */
@@ -258,6 +259,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, 
uint32_t *buffer,
                        engine_sel__mes_unmap_queues__compute;
                break;
        case KFD_QUEUE_TYPE_SDMA:
+       case KFD_QUEUE_TYPE_SDMA_XGMI:
                packet->bitfields2.engine_sel =
                        engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
                break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 045a229436a0..077c47fd4fee 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
 
        process_count = pm->dqm->processes_count;
        queue_count = pm->dqm->queue_count;
-       compute_queue_count = queue_count - pm->dqm->sdma_queue_count;
+       compute_queue_count = queue_count - pm->dqm->sdma_queue_count -
+                               pm->dqm->xgmi_sdma_queue_count;
 
        /* check if there is over subscription
         * Note: the arbitration between the number of VMIDs and
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 87328c96b0f1..c8925b7b6c46 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -188,6 +188,7 @@ struct kfd_device_info {
        bool needs_iommu_device;
        bool needs_pci_atomics;
        unsigned int num_sdma_engines;
+       unsigned int num_xgmi_sdma_engines;
        unsigned int num_sdma_queues_per_engine;
 };
 
@@ -329,7 +330,8 @@ enum kfd_queue_type  {
        KFD_QUEUE_TYPE_COMPUTE,
        KFD_QUEUE_TYPE_SDMA,
        KFD_QUEUE_TYPE_HIQ,
-       KFD_QUEUE_TYPE_DIQ
+       KFD_QUEUE_TYPE_DIQ,
+       KFD_QUEUE_TYPE_SDMA_XGMI
 };
 
 enum kfd_queue_format {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index f18d9cdf9aac..e652e25ede75 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -186,8 +186,13 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 
        switch (type) {
        case KFD_QUEUE_TYPE_SDMA:
-               if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) {
-                       pr_err("Over-subscription is not allowed for SDMA.\n");
+       case KFD_QUEUE_TYPE_SDMA_XGMI:
+               if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count
+                       >= get_num_sdma_queues(dev->dqm)) ||
+                       (type == KFD_QUEUE_TYPE_SDMA_XGMI &&
+                       dev->dqm->xgmi_sdma_queue_count
+                       >= get_num_xgmi_sdma_queues(dev->dqm))) {
+                       pr_debug("Over-subscription is not allowed for 
SDMA.\n");
                        retval = -EPERM;
                        goto err_create_queue;
                }
@@ -446,6 +451,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
                        q = pqn->q;
                        switch (q->properties.type) {
                        case KFD_QUEUE_TYPE_SDMA:
+                       case KFD_QUEUE_TYPE_SDMA_XGMI:
                                seq_printf(m, "  SDMA queue on device %x\n",
                                           q->device->id);
                                mqd_type = KFD_MQD_TYPE_SDMA;
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 1e7d5f3376b0..20917c59f39c 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -35,9 +35,10 @@ struct kfd_ioctl_get_version_args {
 };
 
 /* For kfd_ioctl_create_queue_args.queue_type. */
-#define KFD_IOC_QUEUE_TYPE_COMPUTE     0
-#define KFD_IOC_QUEUE_TYPE_SDMA                1
-#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 2
+#define KFD_IOC_QUEUE_TYPE_COMPUTE             0x0
+#define KFD_IOC_QUEUE_TYPE_SDMA                        0x1
+#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL         0x2
+#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI           0x3
 
 #define KFD_MAX_QUEUE_PERCENTAGE       100
 #define KFD_MAX_QUEUE_PRIORITY         15
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to