In NPS4 BIOS we need to find the closest numa node when creating
topology io link between cpu and gpu, if PCI driver doesn't set
it.

Signed-off-by: Eric Huang <jinhuieric.hu...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 95 ++++++++++++++++++++++++++-
 1 file changed, 93 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 38d45711675f..58c6738de774 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1759,6 +1759,91 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
        return 0;
 }
 
+#ifdef CONFIG_ACPI_NUMA
+static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
+{
+       struct acpi_table_header *table_header = NULL;
+       struct acpi_subtable_header *sub_header = NULL;
+       unsigned long table_end, subtable_len;
+       u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
+                       pci_dev_id(kdev->pdev);
+       u32 bdf;
+       acpi_status status;
+       struct acpi_srat_cpu_affinity *cpu;
+       struct acpi_srat_generic_affinity *gpu;
+       int pxm = 0, max_pxm = 0;
+       int numa_node = NUMA_NO_NODE;
+       bool found = false;
+
+       /* Fetch the SRAT table from ACPI */
+       status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
+       if (status == AE_NOT_FOUND) {
+               pr_warn("SRAT table not found\n");
+               return;
+       } else if (ACPI_FAILURE(status)) {
+               const char *err = acpi_format_exception(status);
+               pr_err("SRAT table error: %s\n", err);
+               return;
+       }
+
+       table_end = (unsigned long)table_header + table_header->length;
+
+       /* Parse all entries looking for a match. */
+       sub_header = (struct acpi_subtable_header *)
+                       ((unsigned long)table_header +
+                       sizeof(struct acpi_table_srat));
+       subtable_len = sub_header->length;
+
+       while (((unsigned long)sub_header) + subtable_len  < table_end) {
+               /*
+                * If length is 0, break from this loop to avoid
+                * infinite loop.
+                */
+               if (subtable_len == 0) {
+                       pr_err("SRAT invalid zero length\n");
+                       break;
+               }
+
+               switch (sub_header->type) {
+               case ACPI_SRAT_TYPE_CPU_AFFINITY:
+                       cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+                       pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+                                       cpu->proximity_domain_lo;
+                       if (pxm > max_pxm)
+                               max_pxm = pxm;
+                       break;
+               case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+                       gpu = (struct acpi_srat_generic_affinity *)sub_header;
+                       bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
+                                       *((u16 *)(&gpu->device_handle[2]));
+                       if (bdf == pci_id) {
+                               found = true;
+                               numa_node = pxm_to_node(gpu->proximity_domain);
+                       }
+                       break;
+               default:
+                       break;
+               }
+
+               if (found)
+                       break;
+
+               sub_header = (struct acpi_subtable_header *)
+                               ((unsigned long)sub_header + subtable_len);
+               subtable_len = sub_header->length;
+       }
+
+       acpi_put_table(table_header);
+
+       /* Workaround bad cpu-gpu binding case */
+       if (found && (numa_node < 0 || numa_node > max_pxm))
+               numa_node = 0;
+
+       if (numa_node != NUMA_NO_NODE)
+               set_dev_node(&kdev->pdev->dev, numa_node);
+}
+#endif
+
 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
  * to its NUMA node
  *     @avail_size: Available size in the memory
@@ -1804,10 +1889,16 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,
        }
 
        sub_type_hdr->proximity_domain_from = proximity_domain;
-#ifdef CONFIG_NUMA
+
+#ifdef CONFIG_ACPI_NUMA
        if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+               kfd_find_numa_node_in_srat(kdev);
+#endif
+#ifdef CONFIG_NUMA
+       if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
                sub_type_hdr->proximity_domain_to = 0;
-       else
+               set_dev_node(&kdev->pdev->dev, 0);
+       } else
                sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
 #else
        sub_type_hdr->proximity_domain_to = 0;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to