From: tiancyin <[email protected]>

[Why]
On some servers equipped with huge system memory at multi-terabyte scale,
the PCI bus physical address alignment policy may assign GPUs very large
bus addresses that exceed 44 bits. This causes DMA address overflow errors:

[   83.216803] amdgpu 0000:43:00.0: DMA addr 0x0000210b39000000+8388608
overflow (mask fffffffffff, bus limit 0).

[How]
Enlarge the DMA mask from 44-bit to 48-bit to accommodate larger physical
addresses.

Signed-off-by: tiancyin <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 26 +++++++++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 25 ++++++++++++++++++++-----
 2 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index a1f8141f28c9..7efc3880eed8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -21,6 +21,7 @@
  *
  */
 #include <linux/firmware.h>
+#include <linux/processor.h>
 #include <linux/pci.h>
 
 #include <drm/drm_cache.h>
@@ -726,7 +727,7 @@ static int gmc_v11_0_gart_init(struct amdgpu_device *adev)
 
 static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
 {
-       int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
+       int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_mask;
        struct amdgpu_device *adev = ip_block->adev;
 
        adev->mmhub.funcs->init(adev);
@@ -805,13 +806,28 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block 
*ip_block)
         */
        adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
 
-       r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
+#if defined CONFIG_X86 && defined CONFIG_PHYS_ADDR_T_64BIT
+       dma_mask = boot_cpu_data.x86_phys_bits >= 48 ? 48 : 44;
+#else
+       dma_mask = 44;
+#endif
+fallback_dma_mask:
+       r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_mask));
        if (r) {
-               dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
-               return r;
+               if (dma_mask > 44) {
+                       dev_notice(
+                               adev->dev,
+                               "amdgpu: %d bit DMA is not available, fallback 
to 44 bit.\n",
+                               dma_mask);
+                       dma_mask = 44;
+                       goto fallback_dma_mask;
+               } else {
+                       dev_warn(adev->dev, "amdgpu: No suitable DMA 
available.\n");
+                       return r;
+               }
        }
 
-       adev->need_swiotlb = drm_need_swiotlb(44);
+       adev->need_swiotlb = drm_need_swiotlb(dma_mask);
 
        r = gmc_v11_0_mc_init(adev);
        if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index f4a19357ccbc..e1dd99e1151f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -21,6 +21,7 @@
  *
  */
 #include <linux/firmware.h>
+#include <linux/processor.h>
 #include <linux/pci.h>
 
 #include <drm/drm_cache.h>
@@ -742,7 +743,7 @@ static int gmc_v12_0_gart_init(struct amdgpu_device *adev)
 
 static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
 {
-       int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
+       int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_mask;
        struct amdgpu_device *adev = ip_block->adev;
 
        adev->mmhub.funcs->init(adev);
@@ -802,13 +803,27 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block 
*ip_block)
         */
        adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
 
-       r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
+#if defined CONFIG_X86 && defined CONFIG_PHYS_ADDR_T_64BIT
+       dma_mask = boot_cpu_data.x86_phys_bits >= 48 ? 48 : 44;
+#else
+       dma_mask = 44;
+#endif
+fallback_dma_mask:
+       r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_mask));
        if (r) {
-               printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
-               return r;
+               if (dma_mask > 44) {
+                       printk(KERN_NOTICE
+                              "amdgpu: %d bit DMA is not available, fallback 
to 44 bit.\n",
+                              dma_mask);
+                       dma_mask = 44;
+                       goto fallback_dma_mask;
+               } else {
+                       printk(KERN_WARNING "amdgpu: No suitable DMA 
available.\n");
+                       return r;
+               }
        }
 
-       adev->need_swiotlb = drm_need_swiotlb(44);
+       adev->need_swiotlb = drm_need_swiotlb(dma_mask);
 
        r = gmc_v12_0_mc_init(adev);
        if (r)
-- 
2.34.1

Reply via email to