RE: [PATCH 01/33] drm/amdkfd: add debug and runtime enable interface
[Public] > -Original Message- > From: Alex Deucher > Sent: Wednesday, May 31, 2023 2:15 PM > To: Kuehling, Felix > Cc: Kim, Jonathan ; amd- > g...@lists.freedesktop.org; dri-devel@lists.freedesktop.org; Huang, JinHuiEric > > Subject: Re: [PATCH 01/33] drm/amdkfd: add debug and runtime enable > interface > > Caution: This message originated from an External Source. Use proper > caution when opening attachments, clicking links, or responding. > > > On Tue, May 30, 2023 at 3:17 PM Felix Kuehling > wrote: > > > > Am 2023-05-25 um 13:27 schrieb Jonathan Kim: > > > Introduce the GPU debug operations interface. > > > > > > For ROCm-GDB to extend the GNU Debugger's ability to inspect the AMD > GPU > > > instruction set, provide the necessary interface to allow the debugger > > > to HW debug-mode set and query exceptions per HSA queue, process or > > > device. > > > > > > The runtime_enable interface coordinates exception handling with the > > > HSA runtime. > > > > > > Usage is available in the kern docs at uapi/linux/kfd_ioctl.h. > > > > > > v2: add num_xcc to device snapshot entry. > > > fixup missing EC_QUEUE_PACKET_RESERVED mask. > > > > > > Signed-off-by: Jonathan Kim > > > > Reviewed-by: Felix Kuehling > > Can you provide a link to the userspace which uses this? Hi Alex, Current WIP user space link is here -> https://github.com/ROCm-Developer-Tools/ROCdbgapi/tree/wip-dbgapi. This will eventually go to amd-master. Thanks, Jon > > Alex > > > > > > > > --- > > > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 48 ++ > > > include/uapi/linux/kfd_ioctl.h | 668 ++- > > > 2 files changed, 715 insertions(+), 1 deletion(-) > > > > > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > > > index 88fe1f31739d..f4b50b74818e 100644 > > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > > > @@ -2729,6 +2729,48 @@ static int kfd_ioctl_criu(struct file *filep, > > > struct > kfd_process *p, void *data) > > > return ret; > > > } > > > > > > +static int kfd_ioctl_runtime_enable(struct file *filep, struct > > > kfd_process > *p, void *data) > > > +{ > > > + return 0; > > > +} > > > + > > > +static int kfd_ioctl_set_debug_trap(struct file *filep, struct > > > kfd_process > *p, void *data) > > > +{ > > > + struct kfd_ioctl_dbg_trap_args *args = data; > > > + int r = 0; > > > + > > > + if (sched_policy == KFD_SCHED_POLICY_NO_HWS) { > > > + pr_err("Debugging does not support sched_policy %i", > sched_policy); > > > + return -EINVAL; > > > + } > > > + > > > + switch (args->op) { > > > + case KFD_IOC_DBG_TRAP_ENABLE: > > > + case KFD_IOC_DBG_TRAP_DISABLE: > > > + case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT: > > > + case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED: > > > + case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: > > > + case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: > > > + case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES: > > > + case KFD_IOC_DBG_TRAP_RESUME_QUEUES: > > > + case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH: > > > + case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH: > > > + case KFD_IOC_DBG_TRAP_SET_FLAGS: > > > + case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: > > > + case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO: > > > + case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: > > > + case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: > > > + pr_warn("Debugging not supported yet\n"); > > > + r = -EACCES; > > > + break; > > > + default: > > > + pr_err("Invalid option: %i\n", args->op); > > > + r = -EINVAL; > > > + } > > > + > > > + return r; > > > +} > > > + > > > #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ > > > [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ > > > .cmd_drv = 0, .name = #ioctl} > > > @@ -2841,6 +2883,12 @@ static const struct amdkfd_ioctl_desc > amdkfd_ioctls[] = { > > > > > >
Re: [PATCH 01/33] drm/amdkfd: add debug and runtime enable interface
On Tue, May 30, 2023 at 3:17 PM Felix Kuehling wrote: > > Am 2023-05-25 um 13:27 schrieb Jonathan Kim: > > Introduce the GPU debug operations interface. > > > > For ROCm-GDB to extend the GNU Debugger's ability to inspect the AMD GPU > > instruction set, provide the necessary interface to allow the debugger > > to HW debug-mode set and query exceptions per HSA queue, process or > > device. > > > > The runtime_enable interface coordinates exception handling with the > > HSA runtime. > > > > Usage is available in the kern docs at uapi/linux/kfd_ioctl.h. > > > > v2: add num_xcc to device snapshot entry. > > fixup missing EC_QUEUE_PACKET_RESERVED mask. > > > > Signed-off-by: Jonathan Kim > > Reviewed-by: Felix Kuehling Can you provide a link to the userspace which uses this? Alex > > > > --- > > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 48 ++ > > include/uapi/linux/kfd_ioctl.h | 668 ++- > > 2 files changed, 715 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > > b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > > index 88fe1f31739d..f4b50b74818e 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > > @@ -2729,6 +2729,48 @@ static int kfd_ioctl_criu(struct file *filep, struct > > kfd_process *p, void *data) > > return ret; > > } > > > > +static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process > > *p, void *data) > > +{ > > + return 0; > > +} > > + > > +static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process > > *p, void *data) > > +{ > > + struct kfd_ioctl_dbg_trap_args *args = data; > > + int r = 0; > > + > > + if (sched_policy == KFD_SCHED_POLICY_NO_HWS) { > > + pr_err("Debugging does not support sched_policy %i", > > sched_policy); > > + return -EINVAL; > > + } > > + > > + switch (args->op) { > > + case KFD_IOC_DBG_TRAP_ENABLE: > > + case KFD_IOC_DBG_TRAP_DISABLE: > > + case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT: > > + case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED: > > + case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: > > + case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: > > + case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES: > > + case KFD_IOC_DBG_TRAP_RESUME_QUEUES: > > + case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH: > > + case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH: > > + case KFD_IOC_DBG_TRAP_SET_FLAGS: > > + case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: > > + case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO: > > + case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: > > + case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: > > + pr_warn("Debugging not supported yet\n"); > > + r = -EACCES; > > + break; > > + default: > > + pr_err("Invalid option: %i\n", args->op); > > + r = -EINVAL; > > + } > > + > > + return r; > > +} > > + > > #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ > > [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ > > .cmd_drv = 0, .name = #ioctl} > > @@ -2841,6 +2883,12 @@ static const struct amdkfd_ioctl_desc > > amdkfd_ioctls[] = { > > > > AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF, > > kfd_ioctl_export_dmabuf, 0), > > + > > + AMDKFD_IOCTL_DEF(AMDKFD_IOC_RUNTIME_ENABLE, > > + kfd_ioctl_runtime_enable, 0), > > + > > + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP, > > + kfd_ioctl_set_debug_trap, 0), > > }; > > > > #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) > > diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h > > index 2a9671e1ddb5..dfe745ee427e 100644 > > --- a/include/uapi/linux/kfd_ioctl.h > > +++ b/include/uapi/linux/kfd_ioctl.h > > @@ -110,6 +110,32 @@ struct kfd_ioctl_get_available_memory_args { > > __u32 pad; > > }; > > > > +struct kfd_dbg_device_info_entry { > > + __u64 exception_status; > > + __u64 lds_base; > > + __u64 lds_limit; > > + __u64 scratch_base; > > + __u64 scratch_limit; > > + __u64 gpuvm_base; > > + __u64 gpuvm_limit; > > + __u32 gpu_id; > > + __u32 location_id; > > + __u32 vendor_id; > > + __u32 device_id; > > + __u32 revision_id; > > + __u32 subsystem_vendor_id; > > + __u32 subsystem_device_id; > > + __u32 fw_version; > > + __u32 gfx_target_version; > > + __u32 simd_count; > > + __u32 max_waves_per_simd; > > + __u32 array_count; > > + __u32 simd_arrays_per_engine; > > + __u32 num_xcc; > > + __u32 capability; > > + __u32 debug_prop; > > +}; > > + > > /* For kfd_ioctl_set_memory_policy_args.default_policy and > > alternate_policy */ > > #define KFD_IOC_CACHE_POLICY_COHERENT 0 > > #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 > > @@ -775,6 +801,640 @@ struct
Re: [PATCH 01/33] drm/amdkfd: add debug and runtime enable interface
Am 2023-05-25 um 13:27 schrieb Jonathan Kim: Introduce the GPU debug operations interface. For ROCm-GDB to extend the GNU Debugger's ability to inspect the AMD GPU instruction set, provide the necessary interface to allow the debugger to HW debug-mode set and query exceptions per HSA queue, process or device. The runtime_enable interface coordinates exception handling with the HSA runtime. Usage is available in the kern docs at uapi/linux/kfd_ioctl.h. v2: add num_xcc to device snapshot entry. fixup missing EC_QUEUE_PACKET_RESERVED mask. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 48 ++ include/uapi/linux/kfd_ioctl.h | 668 ++- 2 files changed, 715 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 88fe1f31739d..f4b50b74818e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2729,6 +2729,48 @@ static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data) return ret; } +static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data) +{ + return 0; +} + +static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, void *data) +{ + struct kfd_ioctl_dbg_trap_args *args = data; + int r = 0; + + if (sched_policy == KFD_SCHED_POLICY_NO_HWS) { + pr_err("Debugging does not support sched_policy %i", sched_policy); + return -EINVAL; + } + + switch (args->op) { + case KFD_IOC_DBG_TRAP_ENABLE: + case KFD_IOC_DBG_TRAP_DISABLE: + case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT: + case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED: + case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: + case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: + case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES: + case KFD_IOC_DBG_TRAP_RESUME_QUEUES: + case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH: + case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH: + case KFD_IOC_DBG_TRAP_SET_FLAGS: + case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: + case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO: + case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: + case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: + pr_warn("Debugging not supported yet\n"); + r = -EACCES; + break; + default: + pr_err("Invalid option: %i\n", args->op); + r = -EINVAL; + } + + return r; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -2841,6 +2883,12 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF, kfd_ioctl_export_dmabuf, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_RUNTIME_ENABLE, + kfd_ioctl_runtime_enable, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP, + kfd_ioctl_set_debug_trap, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 2a9671e1ddb5..dfe745ee427e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -110,6 +110,32 @@ struct kfd_ioctl_get_available_memory_args { __u32 pad; }; +struct kfd_dbg_device_info_entry { + __u64 exception_status; + __u64 lds_base; + __u64 lds_limit; + __u64 scratch_base; + __u64 scratch_limit; + __u64 gpuvm_base; + __u64 gpuvm_limit; + __u32 gpu_id; + __u32 location_id; + __u32 vendor_id; + __u32 device_id; + __u32 revision_id; + __u32 subsystem_vendor_id; + __u32 subsystem_device_id; + __u32 fw_version; + __u32 gfx_target_version; + __u32 simd_count; + __u32 max_waves_per_simd; + __u32 array_count; + __u32 simd_arrays_per_engine; + __u32 num_xcc; + __u32 capability; + __u32 debug_prop; +}; + /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 @@ -775,6 +801,640 @@ struct kfd_ioctl_set_xnack_mode_args { __s32 xnack_enabled; }; +/* Wave launch override modes */ +enum kfd_dbg_trap_override_mode { + KFD_DBG_TRAP_OVERRIDE_OR = 0, + KFD_DBG_TRAP_OVERRIDE_REPLACE = 1 +}; + +/* Wave launch overrides */ +enum kfd_dbg_trap_mask { + KFD_DBG_TRAP_MASK_FP_INVALID = 1, + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL = 2, + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO = 4, + KFD_DBG_TRAP_MASK_FP_OVERFLOW = 8, + KFD_DBG_TRAP_MASK_FP_UNDERFLOW = 16, +
[PATCH 01/33] drm/amdkfd: add debug and runtime enable interface
Introduce the GPU debug operations interface. For ROCm-GDB to extend the GNU Debugger's ability to inspect the AMD GPU instruction set, provide the necessary interface to allow the debugger to HW debug-mode set and query exceptions per HSA queue, process or device. The runtime_enable interface coordinates exception handling with the HSA runtime. Usage is available in the kern docs at uapi/linux/kfd_ioctl.h. v2: add num_xcc to device snapshot entry. fixup missing EC_QUEUE_PACKET_RESERVED mask. Signed-off-by: Jonathan Kim --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 48 ++ include/uapi/linux/kfd_ioctl.h | 668 ++- 2 files changed, 715 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 88fe1f31739d..f4b50b74818e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2729,6 +2729,48 @@ static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data) return ret; } +static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data) +{ + return 0; +} + +static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, void *data) +{ + struct kfd_ioctl_dbg_trap_args *args = data; + int r = 0; + + if (sched_policy == KFD_SCHED_POLICY_NO_HWS) { + pr_err("Debugging does not support sched_policy %i", sched_policy); + return -EINVAL; + } + + switch (args->op) { + case KFD_IOC_DBG_TRAP_ENABLE: + case KFD_IOC_DBG_TRAP_DISABLE: + case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT: + case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED: + case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: + case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: + case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES: + case KFD_IOC_DBG_TRAP_RESUME_QUEUES: + case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH: + case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH: + case KFD_IOC_DBG_TRAP_SET_FLAGS: + case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: + case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO: + case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: + case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: + pr_warn("Debugging not supported yet\n"); + r = -EACCES; + break; + default: + pr_err("Invalid option: %i\n", args->op); + r = -EINVAL; + } + + return r; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -2841,6 +2883,12 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF, kfd_ioctl_export_dmabuf, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_RUNTIME_ENABLE, + kfd_ioctl_runtime_enable, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP, + kfd_ioctl_set_debug_trap, 0), }; #define AMDKFD_CORE_IOCTL_COUNTARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 2a9671e1ddb5..dfe745ee427e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -110,6 +110,32 @@ struct kfd_ioctl_get_available_memory_args { __u32 pad; }; +struct kfd_dbg_device_info_entry { + __u64 exception_status; + __u64 lds_base; + __u64 lds_limit; + __u64 scratch_base; + __u64 scratch_limit; + __u64 gpuvm_base; + __u64 gpuvm_limit; + __u32 gpu_id; + __u32 location_id; + __u32 vendor_id; + __u32 device_id; + __u32 revision_id; + __u32 subsystem_vendor_id; + __u32 subsystem_device_id; + __u32 fw_version; + __u32 gfx_target_version; + __u32 simd_count; + __u32 max_waves_per_simd; + __u32 array_count; + __u32 simd_arrays_per_engine; + __u32 num_xcc; + __u32 capability; + __u32 debug_prop; +}; + /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 @@ -775,6 +801,640 @@ struct kfd_ioctl_set_xnack_mode_args { __s32 xnack_enabled; }; +/* Wave launch override modes */ +enum kfd_dbg_trap_override_mode { + KFD_DBG_TRAP_OVERRIDE_OR = 0, + KFD_DBG_TRAP_OVERRIDE_REPLACE = 1 +}; + +/* Wave launch overrides */ +enum kfd_dbg_trap_mask { + KFD_DBG_TRAP_MASK_FP_INVALID = 1, + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL = 2, + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO = 4, + KFD_DBG_TRAP_MASK_FP_OVERFLOW = 8, + KFD_DBG_TRAP_MASK_FP_UNDERFLOW = 16, + KFD_DBG_TRAP_MASK_FP_INEXACT = 32, + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO = 64, +