Kyle Roarty has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/46246 )
Change subject: dev-hsa,gpu-compute: IOCTL updates for ROCm 4
......................................................................
dev-hsa,gpu-compute: IOCTL updates for ROCm 4
This change copies over the up-to-date kfd_ioctl.h file from the linux
kernel, and updates the gpu_compute_driver to reflect the changes found
in the new version of the kfd_ioctl.h file
Change-Id: I51e8e7158762f4b7e06c0f84507e5889a17939a2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/46246
Reviewed-by: Matt Sinclair <mattdsincl...@gmail.com>
Maintainer: Matt Sinclair <mattdsincl...@gmail.com>
Tested-by: kokoro <noreply+kok...@google.com>
---
M src/dev/hsa/kfd_ioctl.h
M src/gpu-compute/gpu_compute_driver.cc
2 files changed, 310 insertions(+), 275 deletions(-)
Approvals:
Matt Sinclair: Looks good to me, approved; Looks good to me, approved
kokoro: Regressions pass
diff --git a/src/dev/hsa/kfd_ioctl.h b/src/dev/hsa/kfd_ioctl.h
index 504621c..7099851 100644
--- a/src/dev/hsa/kfd_ioctl.h
+++ b/src/dev/hsa/kfd_ioctl.h
@@ -23,13 +23,16 @@
#ifndef KFD_IOCTL_H_INCLUDED
#define KFD_IOCTL_H_INCLUDED
+#include <drm/drm.h>
#include <linux/ioctl.h>
#include <linux/types.h>
-#include <cstdint>
-
+/*
+ * - 1.1 - initial version
+ * - 1.3 - Add SMI events support
+ */
#define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 2
+#define KFD_IOCTL_MINOR_VERSION 3
struct kfd_ioctl_get_version_args
{
@@ -41,6 +44,7 @@
#define KFD_IOC_QUEUE_TYPE_COMPUTE 0
#define KFD_IOC_QUEUE_TYPE_SDMA 1
#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 2
+#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 3
#define KFD_MAX_QUEUE_PERCENTAGE 100
#define KFD_MAX_QUEUE_PRIORITY 15
@@ -89,6 +93,15 @@
uint64_t cu_mask_ptr; /* to KFD */
};
+struct kfd_ioctl_get_queue_wave_state_args
+{
+ uint64_t ctl_stack_address; /* to KFD */
+ uint32_t ctl_stack_used_size; /* from KFD */
+ uint32_t save_area_used_size; /* from KFD */
+ uint32_t queue_id; /* to KFD */
+ uint32_t pad;
+};
+
/* For kfd_ioctl_set_memory_policy_args.default_policy and
alternate_policy */
#define KFD_IOC_CACHE_POLICY_COHERENT 0
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
@@ -104,14 +117,6 @@
uint32_t pad;
};
-struct kfd_ioctl_set_trap_handler_args
-{
- uint64_t tba_addr;
- uint64_t tma_addr;
- uint32_t gpu_id; /* to KFD */
- uint32_t pad;
-};
-
/*
* All counters are monotonic. They are used for profiling of compute jobs.
* The profiling is done by userspace.
@@ -130,8 +135,6 @@
uint32_t pad;
};
-#define NUM_OF_SUPPORTED_GPUS 7
-
struct kfd_process_device_apertures
{
uint64_t lds_base; /* from KFD */
@@ -144,10 +147,12 @@
uint32_t pad;
};
-/* This IOCTL and the limited NUM_OF_SUPPORTED_GPUS is deprecated. Use
- * kfd_ioctl_get_process_apertures_new instead, which supports
- * arbitrary numbers of GPUs.
+/*
+ * AMDKFD_IOC_GET_PROCESS_APERTURES is deprecated. Use
+ * AMDKFD_IOC_GET_PROCESS_APERTURES_NEW instead, which supports an
+ * unlimited number of GPUs.
*/
+#define NUM_OF_SUPPORTED_GPUS 7
struct kfd_ioctl_get_process_apertures_args
{
struct kfd_process_device_apertures
@@ -217,14 +222,21 @@
#define KFD_IOC_WAIT_RESULT_TIMEOUT 1
#define KFD_IOC_WAIT_RESULT_FAIL 2
-/*
- * The added 512 is because, currently, 8*(4096/256) signal events are
- * reserved for debugger events, and we want to provide at least 4K signal
- * events for EOP usage.
- * We add 512 to make the allocated size (KFD_SIGNAL_EVENT_LIMIT * 8) be
- * page aligned.
- */
-#define KFD_SIGNAL_EVENT_LIMIT (4096 + 512)
+#define KFD_SIGNAL_EVENT_LIMIT 4096
+
+/* For kfd_event_data.hw_exception_data.reset_type. */
+#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET 0
+#define KFD_HW_EXCEPTION_PER_ENGINE_RESET 1
+
+/* For kfd_event_data.hw_exception_data.reset_cause. */
+#define KFD_HW_EXCEPTION_GPU_HANG 0
+#define KFD_HW_EXCEPTION_ECC 1
+
+/* For kfd_hsa_memory_exception_data.ErrorType */
+#define KFD_MEM_ERR_NO_RAS 0
+#define KFD_MEM_ERR_SRAM_ECC 1
+#define KFD_MEM_ERR_POISON_CONSUMED 2
+#define KFD_MEM_ERR_GPU_HANG 3
struct kfd_ioctl_create_event_args
{
@@ -267,22 +279,38 @@
/* memory exception data */
struct kfd_hsa_memory_exception_data
{
- struct kfd_memory_exception_failure failure;
- uint64_t va;
- uint32_t gpu_id;
- uint32_t pad;
+ struct kfd_memory_exception_failure failure;
+ uint64_t va;
+ uint32_t gpu_id;
+ uint32_t ErrorType; /* 0 = no RAS error,
+ * 1 = ECC_SRAM,
+ * 2 = Link_SYNFLOOD (poison),
+ * 3 = GPU hang(not attributable to a specific
cause),
+ * other values reserved
+ */
+};
+
+/* hw exception data */
+struct kfd_hsa_hw_exception_data
+{
+ uint32_t reset_type;
+ uint32_t reset_cause;
+ uint32_t memory_lost;
+ uint32_t gpu_id;
};
/* Event data */
struct kfd_event_data
{
- union {
- struct kfd_hsa_memory_exception_data memory_exception_data;
- }; /* From KFD */
- uint64_t kfd_event_data_ext; /* pointer to an extension structure
- for future exception types */
- uint32_t event_id; /* to KFD */
- uint32_t pad;
+ union
+ {
+ struct kfd_hsa_memory_exception_data memory_exception_data;
+ struct kfd_hsa_hw_exception_data hw_exception_data;
+ }; /* From KFD */
+ uint64_t kfd_event_data_ext; /* pointer to an extension
structure
+ for future exception types */
+ uint32_t event_id; /* to KFD */
+ uint32_t pad;
};
struct kfd_ioctl_wait_events_args
@@ -295,12 +323,49 @@
uint32_t wait_result; /* from KFD */
};
-struct kfd_ioctl_alloc_memory_of_scratch_args
+struct kfd_ioctl_set_scratch_backing_va_args
{
- uint64_t va_addr; /* to KFD */
- uint64_t size; /* to KFD */
- uint32_t gpu_id; /* to KFD */
- uint32_t pad;
+ uint64_t va_addr; /* to KFD */
+ uint32_t gpu_id; /* to KFD */
+ uint32_t pad;
+};
+
+struct kfd_ioctl_get_tile_config_args
+{
+ /* to KFD: pointer to tile array */
+ uint64_t tile_config_ptr;
+ /* to KFD: pointer to macro tile array */
+ uint64_t macro_tile_config_ptr;
+ /* to KFD: array size allocated by user mode
+ * from KFD: array size filled by kernel
+ */
+ uint32_t num_tile_configs;
+ /* to KFD: array size allocated by user mode
+ * from KFD: array size filled by kernel
+ */
+ uint32_t num_macro_tile_configs;
+
+ uint32_t gpu_id; /* to KFD */
+ uint32_t gb_addr_config; /* from KFD */
+ uint32_t num_banks; /* from KFD */
+ uint32_t num_ranks; /* from KFD */
+ /* struct size can be extended later if needed
+ * without breaking ABI compatibility
+ */
+};
+
+struct kfd_ioctl_set_trap_handler_args
+{
+ uint64_t tba_addr; /* to KFD */
+ uint64_t tma_addr; /* to KFD */
+ uint32_t gpu_id; /* to KFD */
+ uint32_t pad;
+};
+
+struct kfd_ioctl_acquire_vm_args
+{
+ uint32_t drm_fd; /* to KFD */
+ uint32_t gpu_id; /* to KFD */
};
/* Allocation flags: memory types */
@@ -308,15 +373,27 @@
#define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1)
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2)
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3)
+#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4)
/* Allocation flags: attributes/access options */
-#define KFD_IOC_ALLOC_MEM_FLAGS_NONPAGED (1 << 31)
-#define KFD_IOC_ALLOC_MEM_FLAGS_READONLY (1 << 30)
+#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31)
+#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30)
#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29)
#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28)
#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27)
-#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTE_ACCESS (1 << 26)
-#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 25)
+#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26)
+/* Allocate memory for later SVM (shared virtual memory) mapping.
+ *
+ * @va_addr: virtual address of the memory to be allocated
+ * all later mappings on all GPUs will use this address
+ * @size: size in bytes
+ * @handle: buffer handle returned to user mode, used to refer to
+ * this allocation for mapping, unmapping and freeing
+ * @mmap_offset: for CPU-mapping the allocation by mmapping a render node
+ * for userptrs this is overloaded to specify the CPU address
+ * @gpu_id: device identifier
+ * @flags: memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS
above
+ */
struct kfd_ioctl_alloc_memory_of_gpu_args
{
uint64_t va_addr; /* to KFD */
@@ -327,48 +404,63 @@
uint32_t flags;
};
+/* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu
+ *
+ * @handle: memory handle returned by alloc
+ */
struct kfd_ioctl_free_memory_of_gpu_args
{
uint64_t handle; /* to KFD */
};
+/* Map memory to one or more GPUs
+ *
+ * @handle: memory handle returned by alloc
+ * @device_ids_array_ptr: array of gpu_ids (uint32_t per device)
+ * @n_devices: number of devices in the array
+ * @n_success: number of devices mapped successfully
+ *
+ * @n_success returns information to the caller how many devices from
+ * the start of the array have mapped the buffer successfully. It can
+ * be passed into a subsequent retry call to skip those devices. For
+ * the first call the caller should initialize it to 0.
+ *
+ * If the ioctl completes with return code 0 (success), n_success ==
+ * n_devices.
+ */
struct kfd_ioctl_map_memory_to_gpu_args
{
- uint64_t handle; /* to KFD */
- uint64_t device_ids_array_ptr; /* to KFD */
- uint32_t device_ids_array_size; /* to KFD */
- uint32_t pad;
+ uint64_t handle; /* to KFD */
+ uint64_t device_ids_array_ptr; /* to KFD */
+ uint32_t n_devices; /* to KFD */
+ uint32_t n_success; /* to/from KFD */
};
+/* Unmap memory from one or more GPUs
+ *
+ * same arguments as for mapping
+ */
struct kfd_ioctl_unmap_memory_from_gpu_args
{
- uint64_t handle; /* to KFD */
- uint64_t device_ids_array_ptr; /* to KFD */
- uint32_t device_ids_array_size; /* to KFD */
- uint32_t pad;
+ uint64_t handle; /* to KFD */
+ uint64_t device_ids_array_ptr; /* to KFD */
+ uint32_t n_devices; /* to KFD */
+ uint32_t n_success; /* to/from KFD */
};
-/* TODO: remove this. It's only implemented for Kaveri and was never
- * upstreamed. There are no open-source users of this interface. It
- * has been superseded by the pair of get_dmabuf_info and
- * import_dmabuf, which is implemented for all supported GPUs.
+/* Allocate GWS for specific queue
+ *
+ * @queue_id: queue's id that GWS is allocated for
+ * @num_gws: how many GWS to allocate
+ * @first_gws: index of the first GWS allocated.
+ * only support contiguous GWS allocation
*/
-struct kfd_ioctl_open_graphic_handle_args
+struct kfd_ioctl_alloc_queue_gws_args
{
- uint64_t va_addr; /* to KFD */
- uint64_t handle; /* from KFD */
- uint32_t gpu_id; /* to KFD */
- int graphic_device_fd; /* to KFD */
- uint32_t graphic_handle; /* to KFD */
- uint32_t pad;
-};
-
-struct kfd_ioctl_set_process_dgpu_aperture_args
-{
- uint64_t dgpu_base;
- uint64_t dgpu_limit;
- uint32_t gpu_id;
- uint32_t pad;
+ uint32_t queue_id; /* to KFD */
+ uint32_t num_gws; /* to KFD */
+ uint32_t first_gws; /* from KFD */
+ uint32_t pad;
};
struct kfd_ioctl_get_dmabuf_info_args
@@ -390,79 +482,32 @@
uint32_t dmabuf_fd; /* to KFD */
};
-struct kfd_ioctl_ipc_export_handle_args
-{
- uint64_t handle; /* to KFD */
- uint32_t share_handle[4]; /* from KFD */
- uint32_t gpu_id; /* to KFD */
- uint32_t pad;
-};
-
-struct kfd_ioctl_ipc_import_handle_args
-{
- uint64_t handle; /* from KFD */
- uint64_t va_addr; /* to KFD */
- uint64_t mmap_offset; /* from KFD */
- uint32_t share_handle[4]; /* to KFD */
- uint32_t gpu_id; /* to KFD */
- uint32_t pad;
-};
-
-struct kfd_ioctl_get_tile_config_args
-{
- /* to KFD: pointer to tile array */
- uint64_t tile_config_ptr;
- /* to KFD: pointer to macro tile array */
- uint64_t macro_tile_config_ptr;
- /* to KFD: array size allocated by user mode
- * from KFD: array size filled by kernel
- */
- uint32_t num_tile_configs;
- /* to KFD: array size allocated by user mode
- * from KFD: array size filled by kernel
- */
- uint32_t num_macro_tile_configs;
-
- uint32_t gpu_id; /* to KFD */
- uint32_t gb_addr_config; /* from KFD */
- uint32_t num_banks; /* from KFD */
- uint32_t num_ranks; /* from KFD */
- /* struct size can be extended later if needed
- * without breaking ABI compatibility
- */
-};
-
-struct kfd_memory_range
-{
- uint64_t va_addr;
- uint64_t size;
-};
-
-/* flags definitions
- * BIT0: 0: read operation, 1: write operation.
- * This also identifies if the src or dst array belongs to remote process
+/*
+ * KFD SMI(System Management Interface) events
*/
-#define KFD_CROSS_MEMORY_RW_BIT (1 << 0)
-#define KFD_SET_CROSS_MEMORY_READ(flags) (flags &=
~KFD_CROSS_MEMORY_RW_BIT)
-#define KFD_SET_CROSS_MEMORY_WRITE(flags) (flags |=
KFD_CROSS_MEMORY_RW_BIT)
-#define KFD_IS_CROSS_MEMORY_WRITE(flags) (flags & KFD_CROSS_MEMORY_RW_BIT)
-
-struct kfd_ioctl_cross_memory_copy_args
+enum kfd_smi_event
{
- /* to KFD: Process ID of the remote process */
- uint32_t pid;
- /* to KFD: See above definition */
- uint32_t flags;
- /* to KFD: Source GPU VM range */
- uint64_t src_mem_range_array;
- /* to KFD: Size of above array */
- uint64_t src_mem_array_size;
- /* to KFD: Destination GPU VM range */
- uint64_t dst_mem_range_array;
- /* to KFD: Size of above array */
- uint64_t dst_mem_array_size;
- /* from KFD: Total amount of bytes copied */
- uint64_t bytes_copied;
+ KFD_SMI_EVENT_NONE = 0, /* not used */
+ KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at
1 */
+ KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
+ KFD_SMI_EVENT_GPU_PRE_RESET = 3,
+ KFD_SMI_EVENT_GPU_POST_RESET = 4,
+};
+
+#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
+
+struct kfd_ioctl_smi_events_args
+{
+ uint32_t gpuid; /* to KFD */
+ uint32_t anon_fd; /* from KFD */
+};
+
+/* Register offset inside the remapped mmio page
+ */
+enum kfd_mmio_remap
+{
+ KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0,
+ KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4,
};
#define AMDKFD_IOCTL_BASE 'K'
@@ -519,57 +564,53 @@
#define AMDKFD_IOC_DBG_WAVE_CONTROL \
AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
-#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \
- AMDKFD_IOWR(0x11, struct kfd_ioctl_alloc_memory_of_gpu_args)
+#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \
+ AMDKFD_IOWR(0x11, struct
kfd_ioctl_set_scratch_backing_va_args)
-#define AMDKFD_IOC_FREE_MEMORY_OF_GPU \
- AMDKFD_IOWR(0x12, struct kfd_ioctl_free_memory_of_gpu_args)
+#define AMDKFD_IOC_GET_TILE_CONFIG \
+ AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args)
-#define AMDKFD_IOC_MAP_MEMORY_TO_GPU \
- AMDKFD_IOWR(0x13, struct kfd_ioctl_map_memory_to_gpu_args)
+#define AMDKFD_IOC_SET_TRAP_HANDLER \
+ AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
-#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \
- AMDKFD_IOWR(0x14, struct kfd_ioctl_unmap_memory_from_gpu_args)
+#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \
+ AMDKFD_IOWR(0x14, \
+ struct kfd_ioctl_get_process_apertures_new_args)
-#define AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH \
- AMDKFD_IOWR(0x15, struct kfd_ioctl_alloc_memory_of_scratch_args)
+#define AMDKFD_IOC_ACQUIRE_VM \
+ AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args)
-#define AMDKFD_IOC_SET_CU_MASK \
- AMDKFD_IOW(0x16, struct kfd_ioctl_set_cu_mask_args)
+#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \
+ AMDKFD_IOWR(0x16, struct
kfd_ioctl_alloc_memory_of_gpu_args)
-#define AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE \
- AMDKFD_IOW(0x17, \
- struct kfd_ioctl_set_process_dgpu_aperture_args)
+#define AMDKFD_IOC_FREE_MEMORY_OF_GPU \
+ AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args)
-#define AMDKFD_IOC_SET_TRAP_HANDLER \
- AMDKFD_IOW(0x18, struct kfd_ioctl_set_trap_handler_args)
+#define AMDKFD_IOC_MAP_MEMORY_TO_GPU \
+ AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args)
-#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \
- AMDKFD_IOWR(0x19, struct kfd_ioctl_get_process_apertures_new_args)
+#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \
+ AMDKFD_IOWR(0x19, struct
kfd_ioctl_unmap_memory_from_gpu_args)
-#define AMDKFD_IOC_GET_DMABUF_INFO \
- AMDKFD_IOWR(0x1A, struct kfd_ioctl_get_dmabuf_info_args)
+#define AMDKFD_IOC_SET_CU_MASK \
+ AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args)
-#define AMDKFD_IOC_IMPORT_DMABUF \
- AMDKFD_IOWR(0x1B, struct kfd_ioctl_import_dmabuf_args)
+#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \
+ AMDKFD_IOWR(0x1B, struct
kfd_ioctl_get_queue_wave_state_args)
-#define AMDKFD_IOC_GET_TILE_CONFIG \
- AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_tile_config_args)
+#define AMDKFD_IOC_GET_DMABUF_INFO \
+ AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args)
-#define AMDKFD_IOC_IPC_IMPORT_HANDLE \
- AMDKFD_IOWR(0x1D, struct kfd_ioctl_ipc_import_handle_args)
+#define AMDKFD_IOC_IMPORT_DMABUF \
+ AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
-#define AMDKFD_IOC_IPC_EXPORT_HANDLE \
- AMDKFD_IOWR(0x1E, struct kfd_ioctl_ipc_export_handle_args)
+#define AMDKFD_IOC_ALLOC_QUEUE_GWS \
+ AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
-#define AMDKFD_IOC_CROSS_MEMORY_COPY \
- AMDKFD_IOWR(0x1F, struct kfd_ioctl_cross_memory_copy_args)
-
-/* TODO: remove this */
-#define AMDKFD_IOC_OPEN_GRAPHIC_HANDLE \
- AMDKFD_IOWR(0x20, struct kfd_ioctl_open_graphic_handle_args)
+#define AMDKFD_IOC_SMI_EVENTS \
+ AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args)
#define AMDKFD_COMMAND_START 0x01
-#define AMDKFD_COMMAND_END 0x21
+#define AMDKFD_COMMAND_END 0x20
#endif
diff --git a/src/gpu-compute/gpu_compute_driver.cc
b/src/gpu-compute/gpu_compute_driver.cc
index d23aede..7f8cc16 100644
--- a/src/gpu-compute/gpu_compute_driver.cc
+++ b/src/gpu-compute/gpu_compute_driver.cc
@@ -559,7 +559,91 @@
warn("unimplemented ioctl: AMDKFD_IOC_DBG_WAVE_CONTROL\n");
}
break;
- /**
+ case AMDKFD_IOC_SET_SCRATCH_BACKING_VA:
+ {
+ warn("unimplemented ioctl:
AMDKFD_IOC_SET_SCRATCH_BACKING_VA\n");
+ }
+ break;
+ case AMDKFD_IOC_GET_TILE_CONFIG:
+ {
+ warn("unimplemented ioctl: AMDKFD_IOC_GET_TILE_CONFIG\n");
+ }
+ break;
+ case AMDKFD_IOC_SET_TRAP_HANDLER:
+ {
+ warn("unimplemented ioctl: AMDKFD_IOC_SET_TRAP_HANDLER\n");
+ }
+ break;
+ case AMDKFD_IOC_GET_PROCESS_APERTURES_NEW:
+ {
+ DPRINTF(GPUDriver,
+ "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES_NEW\n");
+
+ TypedBufferArg<kfd_ioctl_get_process_apertures_new_args>
+ ioc_args(ioc_buf);
+
+ ioc_args.copyIn(virt_proxy);
+ ioc_args->num_of_nodes = 1;
+
+ for (int i = 0; i < ioc_args->num_of_nodes; ++i) {
+ TypedBufferArg<kfd_process_device_apertures> ape_args
+ (ioc_args->kfd_process_device_apertures_ptr);
+
+ ape_args->scratch_base = scratchApeBase(i + 1);
+ ape_args->scratch_limit =
+ scratchApeLimit(ape_args->scratch_base);
+ ape_args->lds_base = ldsApeBase(i + 1);
+ ape_args->lds_limit = ldsApeLimit(ape_args->lds_base);
+ ape_args->gpuvm_base = gpuVmApeBase(i + 1);
+ ape_args->gpuvm_limit =
gpuVmApeLimit(ape_args->gpuvm_base);
+
+ // NOTE: Must match ID populated by hsaTopology.py
+ if (isdGPU) {
+ switch (gfxVersion) {
+ case GfxVersion::gfx803:
+ ape_args->gpu_id = 50156;
+ break;
+ case GfxVersion::gfx900:
+ ape_args->gpu_id = 22124;
+ break;
+ default:
+ fatal("Invalid gfx version for dGPU\n");
+ }
+ } else {
+ switch (gfxVersion) {
+ case GfxVersion::gfx801:
+ ape_args->gpu_id = 2765;
+ break;
+ default:
+ fatal("Invalid gfx version for APU\n");
+ }
+ }
+
+ assert(bits<Addr>(ape_args->scratch_base, 63, 47) !=
0x1ffff);
+ assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0);
+ assert(bits<Addr>(ape_args->scratch_limit, 63, 47) !=
0x1ffff);
+ assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0);
+ assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0x1ffff);
+ assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0);
+ assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0x1ffff);
+ assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0);
+ assert(bits<Addr>(ape_args->gpuvm_base, 63, 47) !=
0x1ffff);
+ assert(bits<Addr>(ape_args->gpuvm_base, 63, 47) != 0);
+ assert(bits<Addr>(ape_args->gpuvm_limit, 63, 47) !=
0x1ffff);
+ assert(bits<Addr>(ape_args->gpuvm_limit, 63, 47) != 0);
+
+ ape_args.copyOut(virt_proxy);
+ }
+
+ ioc_args.copyOut(virt_proxy);
+ }
+ break;
+ case AMDKFD_IOC_ACQUIRE_VM:
+ {
+ warn("unimplemented ioctl: AMDKFD_IOC_ACQUIRE_VM\n");
+ }
+ break;
+ /**
* In real hardware, this IOCTL maps host memory, dGPU memory, or
dGPU
* doorbells into GPUVM space. Essentially, ROCm implements SVM by
* carving out a region of free VA space that both the host and
GPUVM
@@ -740,89 +824,14 @@
warn("unimplemented ioctl:
AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU\n");
}
break;
- case AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH:
- {
- warn("unimplemented ioctl:
AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH\n");
- }
- break;
case AMDKFD_IOC_SET_CU_MASK:
{
warn("unimplemented ioctl: AMDKFD_IOC_SET_CU_MASK\n");
}
break;
- case AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE:
+ case AMDKFD_IOC_GET_QUEUE_WAVE_STATE:
{
- warn("unimplemented ioctl:
AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE"
- "\n");
- }
- break;
- case AMDKFD_IOC_SET_TRAP_HANDLER:
- {
- warn("unimplemented ioctl: AMDKFD_IOC_SET_TRAP_HANDLER\n");
- }
- break;
- case AMDKFD_IOC_GET_PROCESS_APERTURES_NEW:
- {
- DPRINTF(GPUDriver,
- "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES_NEW\n");
-
- TypedBufferArg<kfd_ioctl_get_process_apertures_new_args>
- ioc_args(ioc_buf);
-
- ioc_args.copyIn(virt_proxy);
- ioc_args->num_of_nodes = 1;
-
- for (int i = 0; i < ioc_args->num_of_nodes; ++i) {
- TypedBufferArg<kfd_process_device_apertures> ape_args
- (ioc_args->kfd_process_device_apertures_ptr);
-
- ape_args->scratch_base = scratchApeBase(i + 1);
- ape_args->scratch_limit =
- scratchApeLimit(ape_args->scratch_base);
- ape_args->lds_base = ldsApeBase(i + 1);
- ape_args->lds_limit = ldsApeLimit(ape_args->lds_base);
- ape_args->gpuvm_base = gpuVmApeBase(i + 1);
- ape_args->gpuvm_limit =
gpuVmApeLimit(ape_args->gpuvm_base);
-
- // NOTE: Must match ID populated by hsaTopology.py
- if (isdGPU) {
- switch (gfxVersion) {
- case GfxVersion::gfx803:
- ape_args->gpu_id = 50156;
- break;
- case GfxVersion::gfx900:
- ape_args->gpu_id = 22124;
- break;
- default:
- fatal("Invalid gfx version for dGPU\n");
- }
- } else {
- switch (gfxVersion) {
- case GfxVersion::gfx801:
- ape_args->gpu_id = 2765;
- break;
- default:
- fatal("Invalid gfx version for APU\n");
- }
- }
-
- assert(bits<Addr>(ape_args->scratch_base, 63, 47) !=
0x1ffff);
- assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0);
- assert(bits<Addr>(ape_args->scratch_limit, 63, 47) !=
0x1ffff);
- assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0);
- assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0x1ffff);
- assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0);
- assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0x1ffff);
- assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0);
- assert(bits<Addr>(ape_args->gpuvm_base, 63, 47) !=
0x1ffff);
- assert(bits<Addr>(ape_args->gpuvm_base, 63, 47) != 0);
- assert(bits<Addr>(ape_args->gpuvm_limit, 63, 47) !=
0x1ffff);
- assert(bits<Addr>(ape_args->gpuvm_limit, 63, 47) != 0);
-
- ape_args.copyOut(virt_proxy);
- }
-
- ioc_args.copyOut(virt_proxy);
+ warn("unimplemented ioctl: AMDKFD_IOC_GET_QUEUE_WAVE_STATE\n");
}
break;
case AMDKFD_IOC_GET_DMABUF_INFO:
@@ -835,29 +844,14 @@
warn("unimplemented ioctl: AMDKFD_IOC_IMPORT_DMABUF\n");
}
break;
- case AMDKFD_IOC_GET_TILE_CONFIG:
+ case AMDKFD_IOC_ALLOC_QUEUE_GWS:
{
- warn("unimplemented ioctl: AMDKFD_IOC_GET_TILE_CONFIG\n");
+ warn("unimplemented ioctl: AMDKFD_IOC_ALLOC_QUEUE_GWS\n");
}
break;
- case AMDKFD_IOC_IPC_IMPORT_HANDLE:
+ case AMDKFD_IOC_SMI_EVENTS:
{
- warn("unimplemented ioctl: AMDKFD_IOC_IPC_IMPORT_HANDLE\n");
- }
- break;
- case AMDKFD_IOC_IPC_EXPORT_HANDLE:
- {
- warn("unimplemented ioctl: AMDKFD_IOC_IPC_EXPORT_HANDLE\n");
- }
- break;
- case AMDKFD_IOC_CROSS_MEMORY_COPY:
- {
- warn("unimplemented ioctl: AMDKFD_IOC_CROSS_MEMORY_COPY\n");
- }
- break;
- case AMDKFD_IOC_OPEN_GRAPHIC_HANDLE:
- {
- warn("unimplemented ioctl: AMDKFD_IOC_OPEN_GRAPHIC_HANDLE\n");
+ warn("unimplemented ioctl: AMDKFD_IOC_SMI_EVENTS\n");
}
break;
default:
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46246
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I51e8e7158762f4b7e06c0f84507e5889a17939a2
Gerrit-Change-Number: 46246
Gerrit-PatchSet: 8
Gerrit-Owner: Kyle Roarty <kyleroarty1...@gmail.com>
Gerrit-Reviewer: Alex Dutu <alexandru.d...@amd.com>
Gerrit-Reviewer: Kyle Roarty <kyleroarty1...@gmail.com>
Gerrit-Reviewer: Matt Sinclair <mattdsincl...@gmail.com>
Gerrit-Reviewer: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s