[gem5-dev] [M] Change in gem5/gem5[develop]: configs,dev-amdgpu: GPUFS MI200/gfx90a support

2023-05-25 Thread Matthew Poremba (Gerrit) via gem5-dev
Matthew Poremba has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/70317?usp=email )


Change subject: configs,dev-amdgpu: GPUFS MI200/gfx90a support
..

configs,dev-amdgpu: GPUFS MI200/gfx90a support

Add support for MI200-like device. This includes adding PCI IDs and new
MMIOs for the device, a different MAP_PROCESS packet, and a different
calculation for the number of VGPRs.

Change-Id: I0fb7b3ad928826beaa5386d52a94ba504369cb0d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70317
Reviewed-by: Jason Lowe-Power 
Maintainer: Jason Lowe-Power 
Tested-by: kokoro 
---
M configs/example/gpufs/runfs.py
M configs/example/gpufs/system/amdgpu.py
M configs/example/gpufs/system/system.py
M src/dev/amdgpu/amdgpu_device.cc
M src/dev/amdgpu/amdgpu_device.hh
M src/dev/amdgpu/amdgpu_nbio.cc
M src/dev/amdgpu/amdgpu_nbio.hh
M src/dev/amdgpu/amdgpu_vm.hh
M src/dev/amdgpu/pm4_defines.hh
M src/dev/amdgpu/pm4_packet_processor.cc
M src/dev/amdgpu/pm4_packet_processor.hh
M src/gpu-compute/GPU.py
M src/gpu-compute/gpu_command_processor.cc
M src/gpu-compute/hsa_queue_entry.hh
14 files changed, 173 insertions(+), 27 deletions(-)

Approvals:
  Jason Lowe-Power: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass




diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py
index 4c90601..f8ef70d 100644
--- a/configs/example/gpufs/runfs.py
+++ b/configs/example/gpufs/runfs.py
@@ -132,8 +132,9 @@
 parser.add_argument(
 "--gpu-device",
 default="Vega10",
-choices=["Vega10", "MI100"],
-help="GPU model to run: Vega10 (gfx900) or MI100 (gfx908)",
+choices=["Vega10", "MI100", "MI200"],
+help="GPU model to run: Vega10 (gfx900), MI100 (gfx908), or "
+"MI200 (gfx90a)",
 )


diff --git a/configs/example/gpufs/system/amdgpu.py  
b/configs/example/gpufs/system/amdgpu.py

index 5f98b55..9697e50 100644
--- a/configs/example/gpufs/system/amdgpu.py
+++ b/configs/example/gpufs/system/amdgpu.py
@@ -177,6 +177,10 @@
 system.pc.south_bridge.gpu.DeviceID = 0x738C
 system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002
 system.pc.south_bridge.gpu.SubsystemID = 0x0C34
+elif args.gpu_device == "MI200":
+system.pc.south_bridge.gpu.DeviceID = 0x740F
+system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002
+system.pc.south_bridge.gpu.SubsystemID = 0x0C34
 elif args.gpu_device == "Vega10":
 system.pc.south_bridge.gpu.DeviceID = 0x6863
 else:
diff --git a/configs/example/gpufs/system/system.py  
b/configs/example/gpufs/system/system.py

index 90c5c01..263ffc0 100644
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -152,6 +152,16 @@
 0x7D000,
 ]
 sdma_sizes = [0x1000] * 8
+elif args.gpu_device == "MI200":
+num_sdmas = 5
+sdma_bases = [
+0x4980,
+0x6180,
+0x78000,
+0x79000,
+0x7A000,
+]
+sdma_sizes = [0x1000] * 5
 else:
 m5.util.panic(f"Unknown GPU device {args.gpu_device}")

diff --git a/src/dev/amdgpu/amdgpu_device.cc  
b/src/dev/amdgpu/amdgpu_device.cc

index 7037e6f..3260d05 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -115,7 +115,7 @@
 sdmaFunc.insert({0x10b, ::setPageDoorbellOffsetLo});
 sdmaFunc.insert({0xe0, ::setPageSize});
 sdmaFunc.insert({0x113, ::setPageWptrLo});
-} else if (p.device_name == "MI100") {
+} else if (p.device_name == "MI100" || p.device_name == "MI200") {
 sdmaFunc.insert({0xd9, ::setPageBaseLo});
 sdmaFunc.insert({0xe1, ::setPageRptrLo});
 sdmaFunc.insert({0xe0, ::setPageRptrHi});
@@ -144,10 +144,19 @@
 if (p.device_name == "Vega10") {
 setRegVal(VEGA10_FB_LOCATION_BASE, mmhubBase >> 24);
 setRegVal(VEGA10_FB_LOCATION_TOP, mmhubTop >> 24);
+gfx_version = GfxVersion::gfx900;
 } else if (p.device_name == "MI100") {
 setRegVal(MI100_FB_LOCATION_BASE, mmhubBase >> 24);
 setRegVal(MI100_FB_LOCATION_TOP, mmhubTop >> 24);
 setRegVal(MI100_MEM_SIZE_REG, 0x3ff0); // 16GB of memory
+gfx_version = GfxVersion::gfx908;
+} else if (p.device_name == "MI200") {
+// This device can have either 64GB or 128GB of device memory.
+// This limits to 16GB for simulation.
+setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
+setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
+setRegVal(MI200_MEM_SIZE_REG, 0x3ff0);
+gfx_version = GfxVersion::gfx90a;
 } else {
 panic("Unknown GPU device %s\n", p.device_name);
 }
diff --git a/src/dev/amdgpu/amdgpu_device.hh  
b/src/dev/amdgpu/amdgpu_device.hh

index cab7991..56ed2f4 100644
--- a/src/dev/amdgpu/amdgpu_device.hh
+++ 

[gem5-dev] [M] Change in gem5/gem5[develop]: configs,dev-amdgpu: GPUFS MI200/gfx90a support

2023-05-05 Thread Matthew Poremba (Gerrit) via gem5-dev
Matthew Poremba has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/70317?usp=email )



Change subject: configs,dev-amdgpu: GPUFS MI200/gfx90a support
..

configs,dev-amdgpu: GPUFS MI200/gfx90a support

Add support for MI200-like device. This includes adding PCI IDs and new
MMIOs for the device, a different MAP_PROCESS packet, and a different
calculation for the number of VGPRs.

Change-Id: I0fb7b3ad928826beaa5386d52a94ba504369cb0d
---
M configs/example/gpufs/runfs.py
M configs/example/gpufs/system/amdgpu.py
M configs/example/gpufs/system/system.py
M src/dev/amdgpu/amdgpu_device.cc
M src/dev/amdgpu/amdgpu_device.hh
M src/dev/amdgpu/amdgpu_nbio.cc
M src/dev/amdgpu/amdgpu_nbio.hh
M src/dev/amdgpu/amdgpu_vm.hh
M src/dev/amdgpu/pm4_defines.hh
M src/dev/amdgpu/pm4_packet_processor.cc
M src/dev/amdgpu/pm4_packet_processor.hh
M src/gpu-compute/GPU.py
M src/gpu-compute/gpu_command_processor.cc
M src/gpu-compute/hsa_queue_entry.hh
14 files changed, 173 insertions(+), 27 deletions(-)



diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py
index 4c90601..f8ef70d 100644
--- a/configs/example/gpufs/runfs.py
+++ b/configs/example/gpufs/runfs.py
@@ -132,8 +132,9 @@
 parser.add_argument(
 "--gpu-device",
 default="Vega10",
-choices=["Vega10", "MI100"],
-help="GPU model to run: Vega10 (gfx900) or MI100 (gfx908)",
+choices=["Vega10", "MI100", "MI200"],
+help="GPU model to run: Vega10 (gfx900), MI100 (gfx908), or "
+"MI200 (gfx90a)",
 )


diff --git a/configs/example/gpufs/system/amdgpu.py  
b/configs/example/gpufs/system/amdgpu.py

index 5f98b55..9697e50 100644
--- a/configs/example/gpufs/system/amdgpu.py
+++ b/configs/example/gpufs/system/amdgpu.py
@@ -177,6 +177,10 @@
 system.pc.south_bridge.gpu.DeviceID = 0x738C
 system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002
 system.pc.south_bridge.gpu.SubsystemID = 0x0C34
+elif args.gpu_device == "MI200":
+system.pc.south_bridge.gpu.DeviceID = 0x740F
+system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002
+system.pc.south_bridge.gpu.SubsystemID = 0x0C34
 elif args.gpu_device == "Vega10":
 system.pc.south_bridge.gpu.DeviceID = 0x6863
 else:
diff --git a/configs/example/gpufs/system/system.py  
b/configs/example/gpufs/system/system.py

index 90c5c01..263ffc0 100644
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -152,6 +152,16 @@
 0x7D000,
 ]
 sdma_sizes = [0x1000] * 8
+elif args.gpu_device == "MI200":
+num_sdmas = 5
+sdma_bases = [
+0x4980,
+0x6180,
+0x78000,
+0x79000,
+0x7A000,
+]
+sdma_sizes = [0x1000] * 5
 else:
 m5.util.panic(f"Unknown GPU device {args.gpu_device}")

diff --git a/src/dev/amdgpu/amdgpu_device.cc  
b/src/dev/amdgpu/amdgpu_device.cc

index f58d1f7..734f0d7 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -115,7 +115,7 @@
 sdmaFunc.insert({0x10b, ::setPageDoorbellOffsetLo});
 sdmaFunc.insert({0xe0, ::setPageSize});
 sdmaFunc.insert({0x113, ::setPageWptrLo});
-} else if (p.device_name == "MI100") {
+} else if (p.device_name == "MI100" || p.device_name == "MI200") {
 sdmaFunc.insert({0xd9, ::setPageBaseLo});
 sdmaFunc.insert({0xe1, ::setPageRptrLo});
 sdmaFunc.insert({0xe0, ::setPageRptrHi});
@@ -144,10 +144,19 @@
 if (p.device_name == "Vega10") {
 setRegVal(VEGA10_FB_LOCATION_BASE, mmhubBase >> 24);
 setRegVal(VEGA10_FB_LOCATION_TOP, mmhubTop >> 24);
+gfx_version = GfxVersion::gfx900;
 } else if (p.device_name == "MI100") {
 setRegVal(MI100_FB_LOCATION_BASE, mmhubBase >> 24);
 setRegVal(MI100_FB_LOCATION_TOP, mmhubTop >> 24);
 setRegVal(MI100_MEM_SIZE_REG, 0x3ff0); // 16GB of memory
+gfx_version = GfxVersion::gfx908;
+} else if (p.device_name == "MI200") {
+// This device can have either 64GB or 128GB of device memory.
+// This limits to 16GB for simulation.
+setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
+setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
+setRegVal(MI200_MEM_SIZE_REG, 0x3ff0);
+gfx_version = GfxVersion::gfx90a;
 } else {
 panic("Unknown GPU device %s\n", p.device_name);
 }
diff --git a/src/dev/amdgpu/amdgpu_device.hh  
b/src/dev/amdgpu/amdgpu_device.hh

index cab7991..56ed2f4 100644
--- a/src/dev/amdgpu/amdgpu_device.hh
+++ b/src/dev/amdgpu/amdgpu_device.hh
@@ -42,6 +42,7 @@
 #include "dev/amdgpu/mmio_reader.hh"
 #include "dev/io_device.hh"
 #include "dev/pci/device.hh"
+#include "enums/GfxVersion.hh"
 #include "params/AMDGPUDevice.hh"

 namespace gem5
@@ -145,6 +146,9 @@
  */