Matthew Poremba has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/42217 )
Change subject: configs, gpu-compute: Add option to specify gfx version
......................................................................
configs, gpu-compute: Add option to specify gfx version
Currently uses gfx801, gfx803, gfx900 for Carrizo, Fiji,
and Vega respectively
Change-Id: I62758914b6a60f16dd4f2141a23c0a9141a4e1a0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42217
Maintainer: Matthew Poremba <[email protected]>
Maintainer: Matt Sinclair <[email protected]>
Reviewed-by: Matt Sinclair <[email protected]>
Tested-by: kokoro <[email protected]>
---
M configs/example/apu_se.py
M configs/example/hsaTopology.py
M src/gpu-compute/GPU.py
M src/gpu-compute/gpu_compute_driver.cc
M src/gpu-compute/gpu_compute_driver.hh
5 files changed, 248 insertions(+), 10 deletions(-)
Approvals:
Matt Sinclair: Looks good to me, approved; Looks good to me, approved
Matthew Poremba: Looks good to me, approved
kokoro: Regressions pass
diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py
index da49efa..b0da8df 100644
--- a/configs/example/apu_se.py
+++ b/configs/example/apu_se.py
@@ -188,6 +188,9 @@
"the driver on a per-page basis. Valid values are "
"between 0-7")
+parser.add_argument("--gfx-version", type="string", default='gfx801',
+ help="Gfx version for gpu: gfx801, gfx803, gfx900")
+
Ruby.define_options(parser)
# add TLB options to the parser
@@ -430,6 +433,7 @@
# HSA kernel mode driver
gpu_driver = GPUComputeDriver(filename = "kfd", isdGPU = args.dgpu,
+ gfxVersion = args.gfx_version,
dGPUPoolID = 1, m_type = args.m_type)
# Creating the GPU kernel launching components: that is the HSA
@@ -667,8 +671,15 @@
# Create the /sys/devices filesystem for the simulator so that the HSA
Runtime
# knows what type of GPU hardware we are simulating
if args.dgpu:
- hsaTopology.createFijiTopology(args)
+ assert (args.gfx_version in ['gfx803', 'gfx900']),\
+ "Incorrect gfx version for dGPU"
+ if args.gfx_version == 'gfx803':
+ hsaTopology.createFijiTopology(args)
+ elif args.gfx_version == 'gfx900':
+ hsaTopology.createVegaTopology(args)
else:
+ assert (args.gfx_version in ['gfx801']),\
+ "Incorrect gfx version for APU"
hsaTopology.createCarrizoTopology(args)
m5.ticks.setGlobalFrequency('1THz')
diff --git a/configs/example/hsaTopology.py b/configs/example/hsaTopology.py
index a5e0d44..51585de 100644
--- a/configs/example/hsaTopology.py
+++ b/configs/example/hsaTopology.py
@@ -49,6 +49,183 @@
rmtree(path)
makedirs(path)
+# This fakes out a dGPU setup so the runtime operates correctly. The
spoofed
+# system has a single dGPU and a single socket CPU. Note that more complex
+# topologies (multi-GPU, multi-socket CPUs) need to have a different setup
+# here or the runtime won't be able to issue Memcpies from one node to
another.
+#
+# TODO: There is way too much hardcoded here. It doesn't effect anything
in
+# our current ROCm stack (1.6), but it is highly possible that it will in
the
+# future. We might need to scrub through this and extract the appropriate
+# fields from the simulator in the future.
+def createVegaTopology(options):
+ topology_dir = joinpath(m5.options.outdir, \
+ 'fs/sys/devices/virtual/kfd/kfd/topology')
+ remake_dir(topology_dir)
+
+ amdgpu_dir = joinpath(m5.options.outdir, \
+ 'fs/sys/module/amdgpu/parameters')
+ remake_dir(amdgpu_dir)
+
+ pci_ids_dir = joinpath(m5.options.outdir, \
+ 'fs/usr/share/hwdata/')
+ remake_dir(pci_ids_dir)
+
+ # Vega reported VM size in GB. Used to reserve an allocation from CPU
+ # to implement SVM (i.e. GPUVM64 pointers and X86 pointers agree)
+ file_append((amdgpu_dir, 'vm_size'), 256)
+
+ # Ripped from real Vega platform to appease KMT version checks
+ file_append((topology_dir, 'generation_id'), 2)
+
+ # Set up system properties. Regiter as ast-rocm server
+ sys_prop = 'platform_oem 35498446626881\n' + \
+ 'platform_id 71791775140929\n' + \
+ 'platform_rev 2\n'
+ file_append((topology_dir, 'system_properties'), sys_prop)
+
+ # Populate the topology tree
+ # Our dGPU system is two nodes. Node 0 is a CPU and Node 1 is a dGPU
+ node_dir = joinpath(topology_dir, 'nodes/0')
+ remake_dir(node_dir)
+
+ # Register as a CPU
+ file_append((node_dir, 'gpu_id'), 0)
+ file_append((node_dir, 'name'), '')
+
+ # CPU links. Only thing that matters is we tell the runtime that GPU
is
+ # connected through PCIe to CPU socket 0.
+ io_links = 1
+ io_dir = joinpath(node_dir, 'io_links/0')
+ remake_dir(io_dir)
+ io_prop = 'type 2\n' + \
+ 'version_major 0\n' + \
+ 'version_minor 0\n' + \
+ 'node_from 0\n' + \
+ 'node_to 1\n' + \
+ 'weight 20\n' + \
+ 'min_latency 0\n' + \
+ 'max_latency 0\n' + \
+ 'min_bandwidth 0\n' + \
+ 'max_bandwidth 0\n' + \
+ 'recommended_transfer_size 0\n' + \
+ 'flags 13\n'
+ file_append((io_dir, 'properties'), io_prop)
+
+ # Populate CPU node properties
+ node_prop = 'cpu_cores_count %s\n' % options.num_cpus + \
+ 'simd_count 0\n' + \
+ 'mem_banks_count 1\n' + \
+ 'caches_count 0\n' + \
+ 'io_links_count %s\n' % io_links + \
+ 'cpu_core_id_base 0\n' + \
+ 'simd_id_base 0\n' + \
+ 'max_waves_per_simd 0\n' + \
+ 'lds_size_in_kb 0\n' + \
+ 'gds_size_in_kb 0\n' + \
+ 'wave_front_size 64\n' + \
+ 'array_count 0\n' + \
+ 'simd_arrays_per_engine 0\n' + \
+ 'cu_per_simd_array 0\n' + \
+ 'simd_per_cu 0\n' + \
+ 'max_slots_scratch_cu 0\n' + \
+ 'vendor_id 0\n' + \
+ 'device_id 0\n' + \
+ 'location_id 0\n' + \
+ 'drm_render_minor 0\n' + \
+ 'max_engine_clk_ccompute 3400\n'
+
+ file_append((node_dir, 'properties'), node_prop)
+
+ # CPU memory reporting
+ mem_dir = joinpath(node_dir, 'mem_banks/0')
+ remake_dir(mem_dir)
+ mem_prop = 'heap_type %s\n' % HsaHeaptype.HSA_HEAPTYPE_SYSTEM.value + \
+ 'size_in_bytes 33704329216\n' + \
+ 'flags 0\n' + \
+ 'width 72\n' + \
+ 'mem_clk_max 2400\n'
+
+ file_append((mem_dir, 'properties'), mem_prop)
+
+ # Build the GPU node
+ node_dir = joinpath(topology_dir, 'nodes/1')
+ remake_dir(node_dir)
+
+ # Register as a Vega
+ file_append((node_dir, 'gpu_id'), 22124)
+ file_append((node_dir, 'name'), 'Vega\n')
+
+ # 96 in real Vega
+ # Random comment for comparison purposes
+ caches = 0
+
+ # GPU links. Only thing that matters is we tell the runtime that GPU
is
+ # connected through PCIe to CPU socket 0.
+ io_links = 1
+ io_dir = joinpath(node_dir, 'io_links/0')
+ remake_dir(io_dir)
+ io_prop = 'type 2\n' + \
+ 'version_major 0\n' + \
+ 'version_minor 0\n' + \
+ 'node_from 1\n' + \
+ 'node_to 0\n' + \
+ 'weight 20\n' + \
+ 'min_latency 0\n' + \
+ 'max_latency 0\n' + \
+ 'min_bandwidth 0\n' + \
+ 'max_bandwidth 0\n' + \
+ 'recommended_transfer_size 0\n' + \
+ 'flags 1\n'
+ file_append((io_dir, 'properties'), io_prop)
+
+ # Populate GPU node properties
+ cu_scratch = options.simds_per_cu * options.wfs_per_simd
+ node_prop = 'cpu_cores_count 0\n' + \
+ 'simd_count 256\n' + \
+ 'mem_banks_count 1\n' + \
+ 'caches_count %s\n' % caches + \
+ 'io_links_count %s\n' % io_links + \
+ 'cpu_core_id_base 0\n' + \
+ 'simd_id_base 2147487744\n' + \
+ 'max_waves_per_simd 10\n' + \
+ 'lds_size_in_kb 64\n' + \
+ 'gds_size_in_kb 0\n' + \
+ 'wave_front_size 64\n' + \
+ 'array_count 4\n' + \
+ 'simd_arrays_per_engine 1\n' + \
+ 'cu_per_simd_array 16\n' + \
+ 'simd_per_cu 4\n' + \
+ 'max_slots_scratch_cu %s\n' % cu_scratch + \
+ 'vendor_id 4098\n' + \
+ 'device_id 26720\n' + \
+ 'location_id 1024\n' + \
+ 'drm_render_minor 128\n' + \
+ 'hive_id 0\n' + \
+ 'num_sdma_engines 2\n' + \
+ 'num_sdma_xgmi_engines 0\n' + \
+ 'max_engine_clk_fcompute 1500\n' + \
+ 'local_mem_size 17163091968\n' + \
+ 'fw_version 421\n' + \
+ 'capability 238208\n' + \
+ 'debug_prop 32768\n' + \
+ 'sdma_fw_version 430\n' + \
+ 'max_engine_clk_ccompute 3400\n'
+
+ file_append((node_dir, 'properties'), node_prop)
+
+ # Fiji HBM reporting
+ # TODO: Extract size, clk, and width from sim paramters
+ mem_dir = joinpath(node_dir, 'mem_banks/0')
+ remake_dir(mem_dir)
+ mem_prop = 'heap_type %s\n' % heap_type.value + \
+ 'size_in_bytes 17163091968\n' + \
+ 'flags 0\n' + \
+ 'width 2048\n' + \
+ 'mem_clk_max 945\n'
+
+ file_append((mem_dir, 'properties'), mem_prop)
+
# This fakes out a dGPU setup so the runtime correctly operations. The
spoofed
# system has a single dGPU and a single socket CPU. Note that more complex
# topologies (multi-GPU, multi-socket CPUs) need to have a different setup
diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py
index 091fdde..53bece9 100644
--- a/src/gpu-compute/GPU.py
+++ b/src/gpu-compute/GPU.py
@@ -50,6 +50,12 @@
'PF_END',
]
+class GfxVersion(ScopedEnum): vals = [
+ 'gfx801',
+ 'gfx803',
+ 'gfx900',
+ ]
+
class PoolManager(SimObject):
type = 'PoolManager'
abstract = True
@@ -237,6 +243,7 @@
type = 'GPUComputeDriver'
cxx_header = 'gpu-compute/gpu_compute_driver.hh'
isdGPU = Param.Bool(False, 'Driver is for a dGPU')
+ gfxVersion = Param.GfxVersion('gfx801', 'ISA of gpu to model')
dGPUPoolID = Param.Int(False, 'Pool ID for dGPU.')
# Default Mtype for caches
#-- 1 1 1 C_RW_S (Cached-ReadWrite-Shared)
diff --git a/src/gpu-compute/gpu_compute_driver.cc
b/src/gpu-compute/gpu_compute_driver.cc
index 9873e29..c315bd8 100644
--- a/src/gpu-compute/gpu_compute_driver.cc
+++ b/src/gpu-compute/gpu_compute_driver.cc
@@ -47,7 +47,8 @@
#include "sim/syscall_emul_buf.hh"
GPUComputeDriver::GPUComputeDriver(const Params &p)
- : HSADriver(p), isdGPU(p.isdGPU)
+ : HSADriver(p), isdGPU(p.isdGPU), gfxVersion(p.gfxVersion),
+ dGPUPoolID(p.dGPUPoolID)
{
device->attachDriver(this);
DPRINTF(GPUDriver, "Constructing KFD: device\n");
@@ -175,10 +176,34 @@
gpuVmApeLimit(args->process_apertures[i].gpuvm_base);
// NOTE: Must match ID populated by hsaTopology.py
- if (isdGPU)
- args->process_apertures[i].gpu_id = 50156;
- else
- args->process_apertures[i].gpu_id = 2765;
+ //
+ // https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/
+ //
blob/6a986c0943e9acd8c4c0cf2a9d510ff42167b43f/include/uapi/
+ // linux/kfd_ioctl.h#L564
+ //
+ // The gpu_id is a device identifier used by the driver for
+ // ioctls that allocate arguments. Each device has an
unique
+ // id composed out of a non-zero base and an offset.
+ if (isdGPU) {
+ switch (gfxVersion) {
+ case GfxVersion::gfx803:
+ args->process_apertures[i].gpu_id = 50156;
+ break;
+ case GfxVersion::gfx900:
+ args->process_apertures[i].gpu_id = 22124;
+ break;
+ default:
+ fatal("Invalid gfx version for dGPU\n");
+ }
+ } else {
+ switch (gfxVersion) {
+ case GfxVersion::gfx801:
+ args->process_apertures[i].gpu_id = 2765;
+ break;
+ default:
+ fatal("Invalid gfx version for APU\n");
+ }
+ }
DPRINTF(GPUDriver, "GPUVM base for node[%i] = %#x\n", i,
args->process_apertures[i].gpuvm_base);
@@ -610,10 +635,26 @@
ape_args->gpuvm_limit =
gpuVmApeLimit(ape_args->gpuvm_base);
// NOTE: Must match ID populated by hsaTopology.py
- if (isdGPU)
- ape_args->gpu_id = 50156;
- else
- ape_args->gpu_id = 2765;
+ if (isdGPU) {
+ switch (gfxVersion) {
+ case GfxVersion::gfx803:
+ ape_args->gpu_id = 50156;
+ break;
+ case GfxVersion::gfx900:
+ ape_args->gpu_id = 22124;
+ break;
+ default:
+ fatal("Invalid gfx version for dGPU\n");
+ }
+ } else {
+ switch (gfxVersion) {
+ case GfxVersion::gfx801:
+ ape_args->gpu_id = 2765;
+ break;
+ default:
+ fatal("Invalid gfx version for APU\n");
+ }
+ }
assert(bits<Addr>(ape_args->scratch_base, 63, 47) !=
0x1ffff);
assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0);
diff --git a/src/gpu-compute/gpu_compute_driver.hh
b/src/gpu-compute/gpu_compute_driver.hh
index d0c388d..658d4c7 100644
--- a/src/gpu-compute/gpu_compute_driver.hh
+++ b/src/gpu-compute/gpu_compute_driver.hh
@@ -44,6 +44,7 @@
#include "base/addr_range_map.hh"
#include "dev/hsa/hsa_driver.hh"
+#include "enums/GfxVersion.hh"
#include "mem/request.hh"
struct GPUComputeDriverParams;
@@ -68,6 +69,7 @@
private:
bool isdGPU;
+ GfxVersion gfxVersion;
int dGPUPoolID;
/**
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/42217
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I62758914b6a60f16dd4f2141a23c0a9141a4e1a0
Gerrit-Change-Number: 42217
Gerrit-PatchSet: 21
Gerrit-Owner: Alex Dutu <[email protected]>
Gerrit-Reviewer: Jason Lowe-Power <[email protected]>
Gerrit-Reviewer: Matt Sinclair <[email protected]>
Gerrit-Reviewer: Matthew Poremba <[email protected]>
Gerrit-Reviewer: kokoro <[email protected]>
Gerrit-CC: Bobby R. Bruce <[email protected]>
Gerrit-CC: Jason Lowe-Power <[email protected]>
Gerrit-CC: Kyle Roarty <[email protected]>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s